diff --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h --- a/clang/include/clang/Basic/ObjCRuntime.h +++ b/clang/include/clang/Basic/ObjCRuntime.h @@ -480,6 +480,9 @@ friend llvm::hash_code hash_value(const ObjCRuntime &OCR) { return llvm::hash_combine(OCR.getKind(), OCR.getVersion()); } + friend llvm::stable_hash_code stable_hash_value(const ObjCRuntime &OCR) { + return llvm::stable_hash_combine(OCR.getKind(), OCR.getVersion()); + } }; raw_ostream &operator<<(raw_ostream &out, const ObjCRuntime &value); diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h --- a/clang/include/clang/Basic/Sanitizers.h +++ b/clang/include/clang/Basic/Sanitizers.h @@ -77,6 +77,7 @@ } llvm::hash_code hash_value() const; + llvm::stable_hash_code stable_hash_value() const; constexpr explicit operator bool() const { return maskLoToHigh[0] || maskLoToHigh[1]; @@ -124,6 +125,7 @@ // Declaring in clang namespace so that it can be found by ADL. llvm::hash_code hash_value(const clang::SanitizerMask &Arg); +llvm::stable_hash_code stable_hash_value(const clang::SanitizerMask &Arg); // Define the set of sanitizer kinds, as well as the set of sanitizers each // sanitizer group expands into. diff --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h --- a/clang/include/clang/Lex/HeaderSearchOptions.h +++ b/clang/include/clang/Lex/HeaderSearchOptions.h @@ -256,11 +256,22 @@ return llvm::hash_combine(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot); } +inline llvm::stable_hash_code +stable_hash_value(const HeaderSearchOptions::Entry &E) { + return llvm::stable_hash_combine(E.Path, E.Group, E.IsFramework, + E.IgnoreSysRoot); +} + inline llvm::hash_code hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) { return llvm::hash_combine(SHP.Prefix, SHP.IsSystemHeader); } +inline llvm::stable_hash_code +stable_hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) { + return llvm::stable_hash_combine(SHP.Prefix, SHP.IsSystemHeader); +} + } // namespace clang #endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H diff --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h --- a/clang/include/clang/Serialization/ModuleFileExtension.h +++ b/clang/include/clang/Serialization/ModuleFileExtension.h @@ -17,7 +17,7 @@ namespace llvm { class BitstreamCursor; class BitstreamWriter; -class hash_code; +class stable_hash_code; class raw_ostream; } @@ -86,7 +86,7 @@ /// The default implementation of this function simply returns the /// hash code as given, so the presence/absence of this extension /// does not distinguish module files. - virtual llvm::hash_code hashExtension(llvm::hash_code c) const; + virtual llvm::stable_hash_code hashExtension(llvm::stable_hash_code c) const; /// Create a new module file extension writer, which will be /// responsible for writing the extension contents into a particular diff --git a/clang/lib/Basic/Sanitizers.cpp b/clang/lib/Basic/Sanitizers.cpp --- a/clang/lib/Basic/Sanitizers.cpp +++ b/clang/lib/Basic/Sanitizers.cpp @@ -56,9 +56,14 @@ return llvm::hash_combine_range(&maskLoToHigh[0], &maskLoToHigh[kNumElem]); } +llvm::stable_hash_code SanitizerMask::stable_hash_value() const { + return llvm::stable_hash_combine_range(&maskLoToHigh[0], + &maskLoToHigh[kNumElem]); +} + namespace clang { -llvm::hash_code hash_value(const clang::SanitizerMask &Arg) { - return Arg.hash_value(); +llvm::stable_hash_code stable_hash_value(const clang::SanitizerMask &Arg) { + return Arg.stable_hash_value(); } StringRef AsanDtorKindToString(llvm::AsanDtorKind kind) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4464,47 +4464,49 @@ std::string CompilerInvocation::getModuleHash() const { // Note: For QoI reasons, the things we use as a hash here should all be // dumped via the -module-info flag. - using llvm::hash_code; - using llvm::hash_value; - using llvm::hash_combine; - using llvm::hash_combine_range; + using llvm::stable_hash_code; + using llvm::stable_hash_combine; + using llvm::stable_hash_combine_range; + using llvm::stable_hash_value; // Start the signature with the compiler version. // FIXME: We'd rather use something more cryptographically sound than // CityHash, but this will do for now. - hash_code code = hash_value(getClangFullRepositoryVersion()); + stable_hash_code code = stable_hash_value(getClangFullRepositoryVersion()); // Also include the serialization version, in case LLVM_APPEND_VC_REV is off // and getClangFullRepositoryVersion() doesn't include git revision. - code = hash_combine(code, serialization::VERSION_MAJOR, - serialization::VERSION_MINOR); + code = stable_hash_combine(code, serialization::VERSION_MAJOR, + serialization::VERSION_MINOR); // Extend the signature with the language options -#define LANGOPT(Name, Bits, Default, Description) \ - code = hash_combine(code, LangOpts->Name); -#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ - code = hash_combine(code, static_cast(LangOpts->get##Name())); +#define LANGOPT(Name, Bits, Default, Description) \ + code = stable_hash_combine(code, LangOpts->Name); +#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ + code = \ + stable_hash_combine(code, static_cast(LangOpts->get##Name())); #define BENIGN_LANGOPT(Name, Bits, Default, Description) #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description) #include "clang/Basic/LangOptions.def" for (StringRef Feature : LangOpts->ModuleFeatures) - code = hash_combine(code, Feature); + code = stable_hash_combine(code, Feature); - code = hash_combine(code, LangOpts->ObjCRuntime); + code = stable_hash_combine(code, LangOpts->ObjCRuntime); const auto &BCN = LangOpts->CommentOpts.BlockCommandNames; - code = hash_combine(code, hash_combine_range(BCN.begin(), BCN.end())); + code = stable_hash_combine(code, + stable_hash_combine_range(BCN.begin(), BCN.end())); // Extend the signature with the target options. - code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU, - TargetOpts->TuneCPU, TargetOpts->ABI); + code = stable_hash_combine(code, TargetOpts->Triple, TargetOpts->CPU, + TargetOpts->TuneCPU, TargetOpts->ABI); for (const auto &FeatureAsWritten : TargetOpts->FeaturesAsWritten) - code = hash_combine(code, FeatureAsWritten); + code = stable_hash_combine(code, FeatureAsWritten); // Extend the signature with preprocessor options. const PreprocessorOptions &ppOpts = getPreprocessorOpts(); const HeaderSearchOptions &hsOpts = getHeaderSearchOpts(); - code = hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord); + code = stable_hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord); for (const auto &I : getPreprocessorOpts().Macros) { // If we're supposed to ignore this macro for the purposes of modules, @@ -4517,40 +4519,37 @@ continue; } - code = hash_combine(code, I.first, I.second); + code = stable_hash_combine(code, I.first, I.second); } // Extend the signature with the sysroot and other header search options. - code = hash_combine(code, hsOpts.Sysroot, - hsOpts.ModuleFormat, - hsOpts.UseDebugInfo, - hsOpts.UseBuiltinIncludes, - hsOpts.UseStandardSystemIncludes, - hsOpts.UseStandardCXXIncludes, - hsOpts.UseLibcxx, - hsOpts.ModulesValidateDiagnosticOptions); - code = hash_combine(code, hsOpts.ResourceDir); + code = stable_hash_combine(code, hsOpts.Sysroot, hsOpts.ModuleFormat, + hsOpts.UseDebugInfo, hsOpts.UseBuiltinIncludes, + hsOpts.UseStandardSystemIncludes, + hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx, + hsOpts.ModulesValidateDiagnosticOptions); + code = stable_hash_combine(code, hsOpts.ResourceDir); if (hsOpts.ModulesStrictContextHash) { - hash_code SHPC = hash_combine_range(hsOpts.SystemHeaderPrefixes.begin(), - hsOpts.SystemHeaderPrefixes.end()); - hash_code UEC = hash_combine_range(hsOpts.UserEntries.begin(), - hsOpts.UserEntries.end()); - code = hash_combine(code, hsOpts.SystemHeaderPrefixes.size(), SHPC, - hsOpts.UserEntries.size(), UEC); + stable_hash_code SHPC = stable_hash_combine_range( + hsOpts.SystemHeaderPrefixes.begin(), hsOpts.SystemHeaderPrefixes.end()); + stable_hash_code UEC = stable_hash_combine_range(hsOpts.UserEntries.begin(), + hsOpts.UserEntries.end()); + code = stable_hash_combine(code, hsOpts.SystemHeaderPrefixes.size(), SHPC, + hsOpts.UserEntries.size(), UEC); const DiagnosticOptions &diagOpts = getDiagnosticOpts(); - #define DIAGOPT(Name, Bits, Default) \ - code = hash_combine(code, diagOpts.Name); - #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ - code = hash_combine(code, diagOpts.get##Name()); - #include "clang/Basic/DiagnosticOptions.def" - #undef DIAGOPT - #undef ENUM_DIAGOPT +#define DIAGOPT(Name, Bits, Default) \ + code = stable_hash_combine(code, diagOpts.Name); +#define ENUM_DIAGOPT(Name, Type, Bits, Default) \ + code = stable_hash_combine(code, diagOpts.get##Name()); +#include "clang/Basic/DiagnosticOptions.def" +#undef DIAGOPT +#undef ENUM_DIAGOPT } // Extend the signature with the user build path. - code = hash_combine(code, hsOpts.ModuleUserBuildPath); + code = stable_hash_combine(code, hsOpts.ModuleUserBuildPath); // Extend the signature with the module file extensions. const FrontendOptions &frontendOpts = getFrontendOpts(); @@ -4562,14 +4561,14 @@ // affects the debug info in the PCM. if (getCodeGenOpts().DebugTypeExtRefs) for (const auto &KeyValue : getCodeGenOpts().DebugPrefixMap) - code = hash_combine(code, KeyValue.first, KeyValue.second); + code = stable_hash_combine(code, KeyValue.first, KeyValue.second); // Extend the signature with the enabled sanitizers, if at least one is // enabled. Sanitizers which cannot affect AST generation aren't hashed. SanitizerSet SanHash = LangOpts->Sanitize; SanHash.clear(getPPTransparentSanitizers()); if (!SanHash.empty()) - code = hash_combine(code, SanHash.Mask); + code = stable_hash_combine(code, SanHash.Mask); return llvm::APInt(64, code).toString(36, /*Signed=*/false); } diff --git a/clang/lib/Frontend/TestModuleFileExtension.h b/clang/lib/Frontend/TestModuleFileExtension.h --- a/clang/lib/Frontend/TestModuleFileExtension.h +++ b/clang/lib/Frontend/TestModuleFileExtension.h @@ -55,7 +55,8 @@ ModuleFileExtensionMetadata getExtensionMetadata() const override; - llvm::hash_code hashExtension(llvm::hash_code Code) const override; + llvm::stable_hash_code + hashExtension(llvm::stable_hash_code Code) const override; std::unique_ptr createExtensionWriter(ASTWriter &Writer) override; diff --git a/clang/lib/Frontend/TestModuleFileExtension.cpp b/clang/lib/Frontend/TestModuleFileExtension.cpp --- a/clang/lib/Frontend/TestModuleFileExtension.cpp +++ b/clang/lib/Frontend/TestModuleFileExtension.cpp @@ -93,13 +93,13 @@ return { BlockName, MajorVersion, MinorVersion, UserInfo }; } -llvm::hash_code TestModuleFileExtension::hashExtension( - llvm::hash_code Code) const { +llvm::stable_hash_code +TestModuleFileExtension::hashExtension(llvm::stable_hash_code Code) const { if (Hashed) { - Code = llvm::hash_combine(Code, BlockName); - Code = llvm::hash_combine(Code, MajorVersion); - Code = llvm::hash_combine(Code, MinorVersion); - Code = llvm::hash_combine(Code, UserInfo); + Code = llvm::stable_hash_combine(Code, BlockName); + Code = llvm::stable_hash_combine(Code, MajorVersion); + Code = llvm::stable_hash_combine(Code, MinorVersion); + Code = llvm::stable_hash_combine(Code, UserInfo); } return Code; diff --git a/clang/lib/Serialization/ModuleFileExtension.cpp b/clang/lib/Serialization/ModuleFileExtension.cpp --- a/clang/lib/Serialization/ModuleFileExtension.cpp +++ b/clang/lib/Serialization/ModuleFileExtension.cpp @@ -13,7 +13,8 @@ ModuleFileExtension::~ModuleFileExtension() { } -llvm::hash_code ModuleFileExtension::hashExtension(llvm::hash_code Code) const { +llvm::stable_hash_code +ModuleFileExtension::hashExtension(llvm::stable_hash_code Code) const { return Code; } diff --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp --- a/clang/unittests/Frontend/CompilerInvocationTest.cpp +++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp @@ -860,7 +860,8 @@ return {}; }; - llvm::hash_code hashExtension(llvm::hash_code Code) const override { + llvm::stable_hash_code + hashExtension(llvm::stable_hash_code Code) const override { return {}; } diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -57,6 +57,36 @@ namespace llvm { +template class hash_code_impl { +protected: + StorageT value; + +public: + /// Default construct a hash_code_impl. + /// Note that this leaves the value uninitialized. + hash_code_impl() = default; + + /// Form a hash code directly from a numerical value. + hash_code_impl(StorageT value) : value(value) {} + + /// Convert the hash code to its numerical value for use. + /*explicit*/ operator StorageT() const { return value; } + + friend bool operator==(const hash_code_impl &lhs, const hash_code_impl &rhs) { + return lhs.value == rhs.value; + } + friend bool operator!=(const hash_code_impl &lhs, const hash_code_impl &rhs) { + return lhs.value != rhs.value; + } + + /// Allow a hash_code_impl to be directly run through hash_value. + friend StorageT hash_value(const hash_code_impl &code) { return code.value; } + /// Allow a hash_code_impl to be directly run through stable_hash_value. + friend StorageT stable_hash_value(const hash_code_impl &code) { + return code.value; + } +}; + /// An opaque object representing a hash code. /// /// This object represents the result of hashing some entity. It is intended to @@ -69,29 +99,14 @@ /// using llvm::hash_value; /// llvm::hash_code code = hash_value(x); /// \endcode -class hash_code { - size_t value; - +class hash_code : public hash_code_impl { public: /// Default construct a hash_code. /// Note that this leaves the value uninitialized. hash_code() = default; /// Form a hash code directly from a numerical value. - hash_code(size_t value) : value(value) {} - - /// Convert the hash code to its numerical value for use. - /*explicit*/ operator size_t() const { return value; } - - friend bool operator==(const hash_code &lhs, const hash_code &rhs) { - return lhs.value == rhs.value; - } - friend bool operator!=(const hash_code &lhs, const hash_code &rhs) { - return lhs.value != rhs.value; - } - - /// Allow a hash_code to be directly run through hash_value. - friend size_t hash_value(const hash_code &code) { return code.value; } + hash_code(size_t value) : hash_code_impl(value) {} }; /// Compute a hash_code for any integer value. @@ -121,6 +136,27 @@ template hash_code hash_value(const std::basic_string &arg); +/// Combine values into a single hash_code. +/// +/// This routine accepts a varying number of arguments of any type. It will +/// attempt to combine them into a single hash_code. For user-defined types it +/// attempts to call a \see hash_value overload (via ADL) for the type. For +/// integer and pointer types it directly combines their data into the +/// resulting hash_code. +/// +/// The result is suitable for returning from a user's hash_value +/// *implementation* for their user-defined type. Consumers of a type should +/// *not* call this routine, they should instead call 'hash_value'. +template hash_code hash_combine(const Ts &...args); + +/// Compute a hash_code for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same hash_code as +/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences +/// and is significantly faster given pointers and types which can be hashed as +/// a sequence of bytes. +template +hash_code hash_combine_range(InputIteratorT first, InputIteratorT last); /// Override the execution seed with a fixed value. /// @@ -138,6 +174,93 @@ /// behavior. void set_fixed_execution_hash_seed(uint64_t fixed_value); +//===----------------------------------------------------------------------===// +// +// `stable_hash_code` is a trivial variation of `hash_code`. It works and is +// used in the same way. But for a given input the value of `stable_hash_code` +// is guaranteed to be the same across different executions of the program or +// across platforms. +// +// One type and three functions are provided: +// - `stable_hash_code` +// - `stable_hash_value` +// - `stable_hash_combine` and `stable_hash_combine_range` +// +//===----------------------------------------------------------------------===// + +/// An opaque object representing a stable hash code. +/// +/// This object represents the result of hashing some entity. It is intended to +/// be used to implement hashtables or other hashing-based data structures, when +/// the hash value needs to be stable across processes, executions, or +/// platforms. +/// +/// In order to obtain the stable_hash_code for an object 'x': +/// \code +/// using llvm::stable_hash_value; +/// llvm::stable_hash_code code = stable_hash_value(x); +/// \endcode +class stable_hash_code : public hash_code_impl { +public: + /// Default construct a stable_hash_code. + /// Note that this leaves the value uninitialized. + stable_hash_code() = default; + + /// Form a hash code directly from a numerical value. + stable_hash_code(uint64_t value) : hash_code_impl(value) {} +}; + +/// Compute a stable_hash_code for any integer value. +/// +/// Note that this function is intended to compute the same stable_hash_code for +/// a particular value without regard to the pre-promotion type. This is in +/// contrast to stable_hash_combine which may produce different +/// stable_hash_codes for differing argument types even if they would implicit +/// promote to a common type without changing the value. +template +std::enable_if_t::value, stable_hash_code> +stable_hash_value(T value); + +/// Compute a stable_hash_code for a pointer's address. +/// +/// N.B.: This hashes the *address*. Not the value and not the type. +template stable_hash_code stable_hash_value(const T *ptr); + +/// Compute a stable_hash_code for a pair of objects. +template +stable_hash_code stable_hash_value(const std::pair &arg); + +/// Compute a stable_hash_code for a tuple. +template +stable_hash_code stable_hash_value(const std::tuple &arg); + +/// Compute a stable_hash_code for a standard string. +template +stable_hash_code stable_hash_value(const std::basic_string &arg); + +/// Combine values into a single stable_hash_code. +/// +/// This routine accepts a varying number of arguments of any type. It will +/// attempt to combine them into a single stable_hash_code. For user-defined +/// types it attempts to call a \see stable_hash_value overload (via ADL) for +/// the type. For integer and pointer types it directly combines their data into +/// the resulting stable_hash_code. +/// +/// The result is suitable for returning from a user's stable_hash_value +/// *implementation* for their user-defined type. Consumers of a type should +/// *not* call this routine, they should instead call 'stable_hash_value'. +template +stable_hash_code stable_hash_combine(const Ts &...args); + +/// Compute a stable_hash_code for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same stable_hash_code as +/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized +/// sequences and is significantly faster given pointers and types which can be +/// hashed as a sequence of bytes. +template +stable_hash_code stable_hash_combine_range(InputIteratorT first, + InputIteratorT last); // All of the implementation details of actually computing the various hash // code values are held within this namespace. These routines are included in @@ -313,7 +436,6 @@ } }; - /// A global, fixed seed-override variable. /// /// This variable can be set using the \see llvm::set_fixed_execution_seed @@ -321,19 +443,28 @@ /// set or read this variable. extern uint64_t fixed_seed_override; -inline uint64_t get_execution_seed() { +template inline uint64_t get_seed(); + +static constexpr uint64_t seed_prime = 0xff51afd7ed558ccdULL; + +template <> inline uint64_t get_seed() { + // If there is a fixed seed override set the first time this is called, use it + // instead of the default seed. + static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime; + return seed; +} + +template <> inline uint64_t get_seed() { // FIXME: This needs to be a per-execution seed. This is just a placeholder // implementation. Switching to a per-execution seed is likely to flush out // instability bugs and so will happen as its own commit. // // However, if there is a fixed seed override set the first time this is // called, return that instead of the per-execution seed. - const uint64_t seed_prime = 0xff51afd7ed558ccdULL; static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime; return seed; } - /// Trait to indicate whether a type's bits can be hashed directly. /// /// A type trait which is true if we want to combine values for hashing by @@ -363,7 +494,7 @@ /// Helper to get the hashable data representation for a type. /// This variant is enabled when the type itself can be used. -template +template std::enable_if_t::value, T> get_hashable_data(const T &value) { return value; @@ -371,12 +502,21 @@ /// Helper to get the hashable data representation for a type. /// This variant is enabled when we must first call hash_value and use the /// result as our data. -template -std::enable_if_t::value, size_t> +template +std::enable_if_t::value, size_t> get_hashable_data(const T &value) { using ::llvm::hash_value; return hash_value(value); } +/// Helper to get the stable hashable data representation for a type. +/// This variant is enabled when we must first call stable_hash_value and use +/// the result as our data. +template +std::enable_if_t::value, uint64_t> +get_hashable_data(const T &value) { + using ::llvm::stable_hash_value; + return stable_hash_value(value); +} /// Helper to store data from a value into a buffer and advance the /// pointer into that buffer. @@ -402,13 +542,13 @@ /// This overload is selected when the value type of the iterator is /// integral. Rather than computing a hash_code for each object and then /// combining them, this (as an optimization) directly combines the integers. -template -hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { - const uint64_t seed = get_execution_seed(); +template +uint64_t hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { + const uint64_t seed = get_seed(); char buffer[64], *buffer_ptr = buffer; char *const buffer_end = std::end(buffer); while (first != last && store_and_advance(buffer_ptr, buffer_end, - get_hashable_data(*first))) + get_hashable_data(*first))) ++first; if (first == last) return hash_short(buffer, buffer_ptr - buffer, seed); @@ -420,8 +560,9 @@ // Fill up the buffer. We don't clear it, which re-mixes the last round // when only a partial 64-byte chunk is left. buffer_ptr = buffer; - while (first != last && store_and_advance(buffer_ptr, buffer_end, - get_hashable_data(*first))) + while (first != last && + store_and_advance(buffer_ptr, buffer_end, + get_hashable_data(*first))) ++first; // Rotate the buffer if we did a partial fill in order to simulate doing @@ -445,10 +586,10 @@ /// optimization) directly combines the integers. Also, because the integers /// are stored in contiguous memory, this routine avoids copying each value /// and directly reads from the underlying memory. -template -std::enable_if_t::value, hash_code> +template +std::enable_if_t::value, uint64_t> hash_combine_range_impl(ValueT *first, ValueT *last) { - const uint64_t seed = get_execution_seed(); + const uint64_t seed = get_seed(); const char *s_begin = reinterpret_cast(first); const char *s_end = reinterpret_cast(last); const size_t length = std::distance(s_begin, s_end); @@ -471,16 +612,12 @@ } // namespace detail } // namespace hashing - -/// Compute a hash_code for a sequence of values. -/// -/// This hashes a sequence of values. It produces the same hash_code as -/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences -/// and is significantly faster given pointers and types which can be hashed as -/// a sequence of bytes. +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. template hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) { - return ::llvm::hashing::detail::hash_combine_range_impl(first, last); + return ::llvm::hashing::detail::hash_combine_range_impl( + first, last); } @@ -495,7 +632,7 @@ /// recursive combining of arguments used in hash_combine. It is particularly /// useful at minimizing the code in the recursive calls to ease the pain /// caused by a lack of variadic functions. -struct hash_combine_recursive_helper { +template struct hash_combine_recursive_helper { char buffer[64] = {}; hash_state state; const uint64_t seed; @@ -505,8 +642,7 @@ /// /// This sets up the state for a recursive hash combine, including getting /// the seed and buffer setup. - hash_combine_recursive_helper() - : seed(get_execution_seed()) {} + hash_combine_recursive_helper() : seed(get_seed()) {} /// Combine one chunk of data into the current in-flight hash. /// @@ -553,10 +689,11 @@ /// /// This function recurses through each argument, combining that argument /// into a single hash. - template - hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, - const T &arg, const Ts &...args) { - buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg)); + template + uint64_t combine(size_t length, char *buffer_ptr, char *buffer_end, + const T &arg, const Ts &...args) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, + get_hashable_data(arg)); // Recurse to the next argument. return combine(length, buffer_ptr, buffer_end, args...); @@ -567,7 +704,7 @@ /// The base case when combining arguments recursively is reached when all /// arguments have been handled. It flushes the remaining buffer and /// constructs a hash_code. - hash_code combine(size_t length, char *buffer_ptr, char *buffer_end) { + uint64_t combine(size_t length, char *buffer_ptr, char *buffer_end) { // Check whether the entire set of values fit in the buffer. If so, we'll // use the optimized short hashing routine and skip state entirely. if (length == 0) @@ -590,20 +727,12 @@ } // namespace detail } // namespace hashing -/// Combine values into a single hash_code. -/// -/// This routine accepts a varying number of arguments of any type. It will -/// attempt to combine them into a single hash_code. For user-defined types it -/// attempts to call a \see hash_value overload (via ADL) for the type. For -/// integer and pointer types it directly combines their data into the -/// resulting hash_code. -/// -/// The result is suitable for returning from a user's hash_value -/// *implementation* for their user-defined type. Consumers of a type should -/// *not* call this routine, they should instead call 'hash_value'. +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. template hash_code hash_combine(const Ts &...args) { // Recursively hash each argument using a helper class. - ::llvm::hashing::detail::hash_combine_recursive_helper helper; + ::llvm::hashing::detail::hash_combine_recursive_helper + helper; return helper.combine(0, helper.buffer, helper.buffer + 64, args...); } @@ -617,9 +746,8 @@ /// Overloads for smaller integer types are not provided to ensure consistent /// behavior in the presence of integral promotions. Essentially, /// "hash_value('4')" and "hash_value('0' + 4)" should be the same. -inline hash_code hash_integer_value(uint64_t value) { +inline uint64_t hash_integer_value(uint64_t value, uint64_t seed) { // Similar to hash_4to8_bytes but using a seed instead of length. - const uint64_t seed = get_execution_seed(); const char *s = reinterpret_cast(&value); const uint64_t a = fetch32(s); return hash_16_bytes(seed + (a << 3), fetch32(s + 4)); @@ -633,14 +761,16 @@ template std::enable_if_t::value, hash_code> hash_value(T value) { return ::llvm::hashing::detail::hash_integer_value( - static_cast(value)); + static_cast(value), + ::llvm::hashing::detail::get_seed()); } // Declared and documented above, but defined here so that any of the hashing // infrastructure is available. template hash_code hash_value(const T *ptr) { return ::llvm::hashing::detail::hash_integer_value( - reinterpret_cast(ptr)); + reinterpret_cast(ptr), + ::llvm::hashing::detail::get_seed()); } // Declared and documented above, but defined here so that any of the hashing @@ -677,6 +807,79 @@ return hash_combine_range(arg.begin(), arg.end()); } +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +stable_hash_code stable_hash_combine_range(InputIteratorT first, + InputIteratorT last) { + return ::llvm::hashing::detail::hash_combine_range_impl( + first, last); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +stable_hash_code stable_hash_combine(const Ts &...args) { + // Recursively hash each argument using a helper class. + ::llvm::hashing::detail::hash_combine_recursive_helper + helper; + return helper.combine(0, helper.buffer, helper.buffer + 64, args...); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +std::enable_if_t::value, stable_hash_code> +stable_hash_value(T value) { + return ::llvm::hashing::detail::hash_integer_value( + static_cast(value), + ::llvm::hashing::detail::get_seed()); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template stable_hash_code stable_hash_value(const T *ptr) { + return ::llvm::hashing::detail::hash_integer_value( + reinterpret_cast(ptr), + ::llvm::hashing::detail::get_seed()); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +stable_hash_code stable_hash_value(const std::pair &arg) { + return stable_hash_combine(arg.first, arg.second); +} + +// Implementation details for the stable_hash_value overload for +// std::tuple<...>(...). +namespace hashing { +namespace detail { + +template +stable_hash_code +stable_hash_value_tuple_helper(const std::tuple &arg, + std::index_sequence) { + return stable_hash_combine(std::get(arg)...); +} + +} // namespace detail +} // namespace hashing + +template +stable_hash_code stable_hash_value(const std::tuple &arg) { + // TODO: Use std::apply when LLVM starts using C++17. + return ::llvm::hashing::detail::stable_hash_value_tuple_helper( + arg, typename std::index_sequence_for()); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +stable_hash_code stable_hash_value(const std::basic_string &arg) { + return stable_hash_combine_range(arg.begin(), arg.end()); +} + } // namespace llvm #endif diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -925,6 +925,10 @@ LLVM_NODISCARD hash_code hash_value(StringRef S); + /// Compute a hash_code for a StringRef. + LLVM_NODISCARD + stable_hash_code stable_hash_value(StringRef S); + } // end namespace llvm #endif // LLVM_ADT_STRINGREF_H diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h --- a/llvm/include/llvm/CodeGen/MachineStableHash.h +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -20,10 +20,11 @@ class MachineInstr; class MachineOperand; -stable_hash stableHashValue(const MachineOperand &MO); -stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, - bool HashConstantPoolIndices = false, - bool HashMemOperands = false); +codegen_hashing::stable_hash stableHashValue(const MachineOperand &MO); +codegen_hashing::stable_hash +stableHashValue(const MachineInstr &MI, bool HashVRegs = false, + bool HashConstantPoolIndices = false, + bool HashMemOperands = false); } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h --- a/llvm/include/llvm/CodeGen/StableHashing.h +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -18,13 +18,13 @@ #include "llvm/ADT/StringRef.h" namespace llvm { +namespace codegen_hashing { /// An opaque object representing a stable hash code. It can be serialized, /// deserialized, and is stable across processes and executions. using stable_hash = uint64_t; // Implementation details -namespace hashing { namespace detail { // Stable hashes are based on the 64-bit FNV-1 hash: @@ -46,31 +46,30 @@ } } // namespace detail -} // namespace hashing inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; - hashing::detail::stable_hash_append(Hash, A); - hashing::detail::stable_hash_append(Hash, B); + stable_hash Hash = detail::FNV_OFFSET_64; + detail::stable_hash_append(Hash, A); + detail::stable_hash_append(Hash, B); return Hash; } inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, stable_hash C) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; - hashing::detail::stable_hash_append(Hash, A); - hashing::detail::stable_hash_append(Hash, B); - hashing::detail::stable_hash_append(Hash, C); + stable_hash Hash = detail::FNV_OFFSET_64; + detail::stable_hash_append(Hash, A); + detail::stable_hash_append(Hash, B); + detail::stable_hash_append(Hash, C); return Hash; } inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, stable_hash C, stable_hash D) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; - hashing::detail::stable_hash_append(Hash, A); - hashing::detail::stable_hash_append(Hash, B); - hashing::detail::stable_hash_append(Hash, C); - hashing::detail::stable_hash_append(Hash, D); + stable_hash Hash = detail::FNV_OFFSET_64; + detail::stable_hash_append(Hash, A); + detail::stable_hash_append(Hash, B); + detail::stable_hash_append(Hash, C); + detail::stable_hash_append(Hash, D); return Hash; } @@ -83,16 +82,16 @@ template stable_hash stable_hash_combine_range(InputIteratorT First, InputIteratorT Last) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; + stable_hash Hash = detail::FNV_OFFSET_64; for (auto I = First; I != Last; ++I) - hashing::detail::stable_hash_append(Hash, *I); + detail::stable_hash_append(Hash, *I); return Hash; } inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; + stable_hash Hash = detail::FNV_OFFSET_64; for (size_t I = 0; I < C; ++I) - hashing::detail::stable_hash_append(Hash, P[I]); + detail::stable_hash_append(Hash, P[I]); return Hash; } @@ -101,12 +100,13 @@ } inline stable_hash stable_hash_combine_string(const char *C) { - stable_hash Hash = hashing::detail::FNV_OFFSET_64; + stable_hash Hash = detail::FNV_OFFSET_64; while (*C) - hashing::detail::stable_hash_append(Hash, *(C++)); + detail::stable_hash_append(Hash, *(C++)); return Hash; } +} // namespace codegen_hashing } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h --- a/llvm/include/llvm/Support/VersionTuple.h +++ b/llvm/include/llvm/Support/VersionTuple.h @@ -149,6 +149,10 @@ return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build); } + friend llvm::stable_hash_code stable_hash_value(const VersionTuple &VT) { + return llvm::stable_hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build); + } + /// Retrieve a string representation of the version number. std::string getAsString() const; diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp --- a/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/llvm/lib/CodeGen/MachineStableHash.cpp @@ -39,6 +39,7 @@ #define DEBUG_TYPE "machine-stable-hash" using namespace llvm; +namespace cgh = ::llvm::codegen_hashing; STATISTIC(StableHashBailingMachineBasicBlock, "Number of encountered unsupported MachineOperands that were " @@ -59,7 +60,7 @@ "Number of encountered unsupported MachineOperands that were " "Metadata of an unsupported kind while computing stable hashes"); -stable_hash llvm::stableHashValue(const MachineOperand &MO) { +cgh::stable_hash llvm::stableHashValue(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Register: if (Register::isVirtualRegister(MO.getReg())) { @@ -68,16 +69,17 @@ } // Register operands don't have target flags. - return stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), - MO.isDef()); + return cgh::stable_hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), + MO.isDef()); case MachineOperand::MO_Immediate: - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getImm()); case MachineOperand::MO_CImmediate: case MachineOperand::MO_FPImmediate: { auto Val = MO.isCImm() ? MO.getCImm()->getValue() : MO.getFPImm()->getValueAPF().bitcastToAPInt(); auto ValHash = - stable_hash_combine_array(Val.getRawData(), Val.getNumWords()); + cgh::stable_hash_combine_array(Val.getRawData(), Val.getNumWords()); return hash_combine(MO.getType(), MO.getTargetFlags(), ValHash); } @@ -98,51 +100,52 @@ return 0; case MachineOperand::MO_TargetIndex: { if (const char *Name = MO.getTargetIndexName()) - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_combine_string(Name), - MO.getOffset()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + cgh::stable_hash_combine_string(Name), + MO.getOffset()); StableHashBailingTargetIndexNoName++; return 0; } case MachineOperand::MO_FrameIndex: case MachineOperand::MO_JumpTableIndex: - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getIndex()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIndex()); case MachineOperand::MO_ExternalSymbol: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), - stable_hash_combine_string(MO.getSymbolName())); + cgh::stable_hash_combine_string(MO.getSymbolName())); case MachineOperand::MO_RegisterMask: case MachineOperand::MO_RegisterLiveOut: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); case MachineOperand::MO_ShuffleMask: { - std::vector ShuffleMaskHashes; + std::vector ShuffleMaskHashes; llvm::transform( MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes), - [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); }); + [](int S) -> cgh::stable_hash { return cgh::stable_hash(S); }); - return hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_combine_array(ShuffleMaskHashes.data(), - ShuffleMaskHashes.size())); + return hash_combine( + MO.getType(), MO.getTargetFlags(), + cgh::stable_hash_combine_array(ShuffleMaskHashes.data(), + ShuffleMaskHashes.size())); } case MachineOperand::MO_MCSymbol: { auto SymbolName = MO.getMCSymbol()->getName(); return hash_combine(MO.getType(), MO.getTargetFlags(), - stable_hash_combine_string(SymbolName)); + cgh::stable_hash_combine_string(SymbolName)); } case MachineOperand::MO_CFIIndex: - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getCFIIndex()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getCFIIndex()); case MachineOperand::MO_IntrinsicID: - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getIntrinsicID()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getIntrinsicID()); case MachineOperand::MO_Predicate: - return stable_hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getPredicate()); + return cgh::stable_hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getPredicate()); } llvm_unreachable("Invalid machine operand type"); } @@ -151,11 +154,11 @@ /// Returns 0 if no stable hash could be computed. /// The hashing and equality testing functions ignore definitions so this is /// useful for CSE, etc. -stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, - bool HashConstantPoolIndices, - bool HashMemOperands) { +cgh::stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, + bool HashConstantPoolIndices, + bool HashMemOperands) { // Build up a buffer of hash code components. - SmallVector HashComponents; + SmallVector HashComponents; HashComponents.reserve(MI.getNumOperands() + MI.getNumMemOperands() + 2); HashComponents.push_back(MI.getOpcode()); HashComponents.push_back(MI.getFlags()); @@ -165,12 +168,12 @@ continue; // Skip virtual register defs. if (MO.isCPI()) { - HashComponents.push_back(stable_hash_combine( + HashComponents.push_back(cgh::stable_hash_combine( MO.getType(), MO.getTargetFlags(), MO.getIndex())); continue; } - stable_hash StableHash = stableHashValue(MO); + cgh::stable_hash StableHash = stableHashValue(MO); if (!StableHash) return 0; HashComponents.push_back(StableHash); @@ -189,6 +192,6 @@ HashComponents.push_back(static_cast(Op->getFailureOrdering())); } - return stable_hash_combine_range(HashComponents.begin(), - HashComponents.end()); + return cgh::stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -168,7 +168,10 @@ /// /// does not cause MDOperand to be transparent. In particular, a bare pointer /// doesn't get hashed before it's combined, whereas \a MDOperand would. -static const Metadata *get_hashable_data(const MDOperand &X) { return X.get(); } +template +static const Metadata *get_hashable_data(const MDOperand &X) { + return X.get(); +} } // end namespace llvm diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -600,3 +600,7 @@ hash_code llvm::hash_value(StringRef S) { return hash_combine_range(S.begin(), S.end()); } + +stable_hash_code llvm::stable_hash_value(StringRef S) { + return stable_hash_combine_range(S.begin(), S.end()); +} diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -67,6 +67,7 @@ SparseBitVectorTest.cpp SparseMultiSetTest.cpp SparseSetTest.cpp + StableHashingTest.cpp StatisticTest.cpp StringExtrasTest.cpp StringMapTest.cpp diff --git a/llvm/unittests/ADT/StableHashingTest.cpp b/llvm/unittests/ADT/StableHashingTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/ADT/StableHashingTest.cpp @@ -0,0 +1,433 @@ +//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Hashing.h unit tests for `stable_hash_code`. +// +// This file is an adaptation of `HashingTest.cpp`. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/DataTypes.h" +#include "gtest/gtest.h" +#include +#include +#include +#include + +namespace llvm { + +// Helper for test code to print hash codes. +void PrintTo(const stable_hash_code &code, std::ostream *os) { + *os << static_cast(code); +} + +// Fake an object that is recognized as hashable data to test super large +// objects. +struct LargeTestInteger { + uint64_t arr[8]; +}; + +struct NonPOD { + uint64_t x, y; + NonPOD(uint64_t x, uint64_t y) : x(x), y(y) {} + friend stable_hash_code stable_hash_value(const NonPOD &obj) { + return stable_hash_combine(obj.x, obj.y); + } +}; + +namespace hashing { +namespace detail { +template <> struct is_hashable_data : std::true_type {}; +} // namespace detail +} // namespace hashing + +} // namespace llvm + +using namespace llvm; + +namespace { + +enum TestEnumeration { TE_Foo = 42, TE_Bar = 43 }; + +TEST(StableHashingTest, HashValueBasicTest) { + int x = 42, y = 43, c = 'x'; + void *p = nullptr; + uint64_t i = 71; + const unsigned ci = 71; + volatile int vi = 71; + const volatile int cvi = 71; + uintptr_t addr = reinterpret_cast(&y); + EXPECT_EQ(stable_hash_value(42), stable_hash_value(x)); + EXPECT_EQ(stable_hash_value(42), stable_hash_value(TE_Foo)); + EXPECT_NE(stable_hash_value(42), stable_hash_value(y)); + EXPECT_NE(stable_hash_value(42), stable_hash_value(TE_Bar)); + EXPECT_NE(stable_hash_value(42), stable_hash_value(p)); + EXPECT_EQ(stable_hash_value(71), stable_hash_value(i)); + EXPECT_EQ(stable_hash_value(71), stable_hash_value(ci)); + EXPECT_EQ(stable_hash_value(71), stable_hash_value(vi)); + EXPECT_EQ(stable_hash_value(71), stable_hash_value(cvi)); + EXPECT_EQ(stable_hash_value(c), stable_hash_value('x')); + EXPECT_EQ(stable_hash_value('4'), stable_hash_value('0' + 4)); + EXPECT_EQ(stable_hash_value(addr), stable_hash_value(&y)); +} + +TEST(StableHashingTest, HashValueStdPair) { + EXPECT_EQ(stable_hash_combine(42, 43), + stable_hash_value(std::make_pair(42, 43))); + EXPECT_NE(stable_hash_combine(43, 42), + stable_hash_value(std::make_pair(42, 43))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_pair(42ull, 43ull))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_pair(42, 43ull))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_pair(42ull, 43))); + + // Note that pairs are implicitly flattened to a direct sequence of data and + // hashed efficiently as a consequence. + EXPECT_EQ(stable_hash_combine(42, 43, 44), + stable_hash_value(std::make_pair(42, std::make_pair(43, 44)))); + EXPECT_EQ(stable_hash_value(std::make_pair(42, std::make_pair(43, 44))), + stable_hash_value(std::make_pair(std::make_pair(42, 43), 44))); + + // Ensure that pairs which have padding bytes *inside* them don't get treated + // this way. + EXPECT_EQ(stable_hash_combine('0', stable_hash_combine(1ull, '2')), + stable_hash_value(std::make_pair('0', std::make_pair(1ull, '2')))); + + // Ensure that non-POD pairs don't explode the traits used. + NonPOD obj1(1, 2), obj2(3, 4), obj3(5, 6); + EXPECT_EQ( + stable_hash_combine(obj1, stable_hash_combine(obj2, obj3)), + stable_hash_value(std::make_pair(obj1, std::make_pair(obj2, obj3)))); +} + +TEST(StableHashingTest, HashValueStdTuple) { + EXPECT_EQ(stable_hash_combine(), stable_hash_value(std::make_tuple())); + EXPECT_EQ(stable_hash_combine(42), stable_hash_value(std::make_tuple(42))); + EXPECT_EQ(stable_hash_combine(42, 'c'), + stable_hash_value(std::make_tuple(42, 'c'))); + + EXPECT_NE(stable_hash_combine(43, 42), + stable_hash_value(std::make_tuple(42, 43))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_tuple(42ull, 43ull))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_tuple(42, 43ull))); + EXPECT_NE(stable_hash_combine(42, 43), + stable_hash_value(std::make_tuple(42ull, 43))); +} + +TEST(StableHashingTest, HashValueStdString) { + std::string s = "Hello World!"; + EXPECT_EQ(stable_hash_combine_range(s.c_str(), s.c_str() + s.size()), + stable_hash_value(s)); + EXPECT_EQ(stable_hash_combine_range(s.c_str(), s.c_str() + s.size() - 1), + stable_hash_value(s.substr(0, s.size() - 1))); + EXPECT_EQ(stable_hash_combine_range(s.c_str() + 1, s.c_str() + s.size() - 1), + stable_hash_value(s.substr(1, s.size() - 2))); + + std::wstring ws = L"Hello Wide World!"; + EXPECT_EQ(stable_hash_combine_range(ws.c_str(), ws.c_str() + ws.size()), + stable_hash_value(ws)); + EXPECT_EQ(stable_hash_combine_range(ws.c_str(), ws.c_str() + ws.size() - 1), + stable_hash_value(ws.substr(0, ws.size() - 1))); + EXPECT_EQ( + stable_hash_combine_range(ws.c_str() + 1, ws.c_str() + ws.size() - 1), + stable_hash_value(ws.substr(1, ws.size() - 2))); +} + +template T *begin(T (&arr)[N]) { return arr; } +template T *end(T (&arr)[N]) { return arr + N; } + +// Provide a dummy, hashable type designed for easy verification: its hash is +// the same as its value. +struct HashableDummy { + uint64_t value; +}; +stable_hash_code stable_hash_value(HashableDummy dummy) { return dummy.value; } + +TEST(StableHashingTest, HashCombineRangeBasicTest) { + // Leave this uninitialized in the hope that valgrind will catch bad reads. + int dummy; + stable_hash_code dummy_hash = stable_hash_combine_range(&dummy, &dummy); + EXPECT_NE(stable_hash_code(0), dummy_hash); + + const int arr1[] = {1, 2, 3}; + stable_hash_code arr1_hash = + stable_hash_combine_range(begin(arr1), end(arr1)); + EXPECT_NE(dummy_hash, arr1_hash); + EXPECT_EQ(arr1_hash, stable_hash_combine_range(begin(arr1), end(arr1))); + + const std::vector vec(begin(arr1), end(arr1)); + EXPECT_EQ(arr1_hash, stable_hash_combine_range(vec.begin(), vec.end())); + + const std::list list(begin(arr1), end(arr1)); + EXPECT_EQ(arr1_hash, stable_hash_combine_range(list.begin(), list.end())); + + const std::deque deque(begin(arr1), end(arr1)); + EXPECT_EQ(arr1_hash, stable_hash_combine_range(deque.begin(), deque.end())); + + const int arr2[] = {3, 2, 1}; + stable_hash_code arr2_hash = + stable_hash_combine_range(begin(arr2), end(arr2)); + EXPECT_NE(dummy_hash, arr2_hash); + EXPECT_NE(arr1_hash, arr2_hash); + + const int arr3[] = {1, 1, 2, 3}; + stable_hash_code arr3_hash = + stable_hash_combine_range(begin(arr3), end(arr3)); + EXPECT_NE(dummy_hash, arr3_hash); + EXPECT_NE(arr1_hash, arr3_hash); + + const int arr4[] = {1, 2, 3, 3}; + stable_hash_code arr4_hash = + stable_hash_combine_range(begin(arr4), end(arr4)); + EXPECT_NE(dummy_hash, arr4_hash); + EXPECT_NE(arr1_hash, arr4_hash); + + const size_t arr5[] = {1, 2, 3}; + const HashableDummy d_arr5[] = {{1}, {2}, {3}}; + stable_hash_code arr5_hash = + stable_hash_combine_range(begin(arr5), end(arr5)); + stable_hash_code d_arr5_hash = + stable_hash_combine_range(begin(d_arr5), end(d_arr5)); + EXPECT_EQ(arr5_hash, d_arr5_hash); +} + +TEST(StableHashingTest, HashCombineRangeLengthDiff) { + // Test that as only the length varies, we compute different hash codes for + // sequences. + std::map code_to_size; + std::vector all_one_c(256, '\xff'); + for (unsigned Idx = 1, Size = all_one_c.size(); Idx < Size; ++Idx) { + stable_hash_code code = + stable_hash_combine_range(&all_one_c[0], &all_one_c[0] + Idx); + std::map::iterator I = + code_to_size.insert(std::make_pair(code, Idx)).first; + EXPECT_EQ(Idx, I->second); + } + code_to_size.clear(); + std::vector all_zero_c(256, '\0'); + for (unsigned Idx = 1, Size = all_zero_c.size(); Idx < Size; ++Idx) { + stable_hash_code code = + stable_hash_combine_range(&all_zero_c[0], &all_zero_c[0] + Idx); + std::map::iterator I = + code_to_size.insert(std::make_pair(code, Idx)).first; + EXPECT_EQ(Idx, I->second); + } + code_to_size.clear(); + std::vector all_one_int(512, -1); + for (unsigned Idx = 1, Size = all_one_int.size(); Idx < Size; ++Idx) { + stable_hash_code code = + stable_hash_combine_range(&all_one_int[0], &all_one_int[0] + Idx); + std::map::iterator I = + code_to_size.insert(std::make_pair(code, Idx)).first; + EXPECT_EQ(Idx, I->second); + } + code_to_size.clear(); + std::vector all_zero_int(512, 0); + for (unsigned Idx = 1, Size = all_zero_int.size(); Idx < Size; ++Idx) { + stable_hash_code code = + stable_hash_combine_range(&all_zero_int[0], &all_zero_int[0] + Idx); + std::map::iterator I = + code_to_size.insert(std::make_pair(code, Idx)).first; + EXPECT_EQ(Idx, I->second); + } +} + +TEST(StableHashingTest, HashCombineRangeGoldenTest) { + struct { + const char *s; + uint64_t hash; + } golden_data[] = { + // clang-format off + { "a", 0xaeb6f9d5517c61f8ULL }, + { "ab", 0x7ab1edb96be496b4ULL }, + { "abc", 0xe38e60bf19c71a3fULL }, + { "abcde", 0xd24461a66de97f6eULL }, + { "abcdefgh", 0x4ef872ec411dec9dULL }, + { "abcdefghijklm", 0xe8a865539f4eadfeULL }, + { "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL }, + { "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL }, + { "abcdefghijklmnopqrstuvwxyzabcdef" + "abcdefghijklmnopqrstuvwxyzghijkl" + "abcdefghijklmnopqrstuvwxyzmnopqr" + "abcdefghijklmnopqrstuvwxyzstuvwx" + "abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL }, + { "a", 0xaeb6f9d5517c61f8ULL }, + { "aa", 0xf2b3b69a9736a1ebULL }, + { "aaa", 0xf752eb6f07b1cafeULL }, + { "aaaaa", 0x812bd21e1236954cULL }, + { "aaaaaaaa", 0xff07a2cff08ac587ULL }, + { "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL }, + { "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL }, + { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL }, + { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL }, + { "z", 0x1ba160d7e8f8785cULL }, + { "zz", 0x2c5c03172f1285d7ULL }, + { "zzz", 0x9d2c4f4b507a2ac3ULL }, + { "zzzzz", 0x0f03b9031735693aULL }, + { "zzzzzzzz", 0xe674147c8582c08eULL }, + { "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL }, + { "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL }, + { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL }, + { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" + "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL }, + { "a", 0xaeb6f9d5517c61f8ULL }, + { "ab", 0x7ab1edb96be496b4ULL }, + { "aba", 0x3edb049950884d0aULL }, + { "ababa", 0x8f2de9e73a97714bULL }, + { "abababab", 0xee14a29ddf0ce54cULL }, + { "ababababababa", 0x38b3ddaada2d52b4ULL }, + { "ababababababababababa", 0xd3665364219f2b85ULL }, + { "abababababababababababababababab", 0xa75cd6afbf1bc972ULL }, + { "abababababababababababababababab" + "abababababababababababababababab" + "abababababababababababababababab" + "abababababababababababababababab" + "abababababababababababababababab", 0x840192d129f7a22bULL } + // clang-format on + }; + for (unsigned i = 0; i < sizeof(golden_data) / sizeof(*golden_data); ++i) { + StringRef str = golden_data[i].s; + stable_hash_code hash = stable_hash_combine_range(str.begin(), str.end()); +#if 0 // Enable this to generate paste-able text for the above structure. + std::string member_str = "\"" + str.str() + "\","; + fprintf(stderr, " { %-35s 0x%016llxULL },\n", + member_str.c_str(), static_cast(hash)); +#endif + EXPECT_EQ(golden_data[i].hash, static_cast(hash)); + } +} + +TEST(StableHashingTest, HashCombineBasicTest) { + // Hashing a sequence of homogenous types matches range hashing. + const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79; + const int arr1[] = {i1, i2, i3, i4, i5, i6}; + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 1), stable_hash_combine(i1)); + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 2), + stable_hash_combine(i1, i2)); + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 3), + stable_hash_combine(i1, i2, i3)); + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 4), + stable_hash_combine(i1, i2, i3, i4)); + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 5), + stable_hash_combine(i1, i2, i3, i4, i5)); + EXPECT_EQ(stable_hash_combine_range(arr1, arr1 + 6), + stable_hash_combine(i1, i2, i3, i4, i5, i6)); + + // Hashing a sequence of heterogeneous types which *happen* to all produce the + // same data for hashing produces the same as a range-based hash of the + // fundamental values. + const size_t s1 = 1024, s2 = 8888, s3 = 9000000; + const HashableDummy d1 = {1024}, d2 = {8888}, d3 = {9000000}; + const size_t arr2[] = {s1, s2, s3}; + EXPECT_EQ(stable_hash_combine_range(begin(arr2), end(arr2)), + stable_hash_combine(s1, s2, s3)); + EXPECT_EQ(stable_hash_combine(s1, s2, s3), stable_hash_combine(s1, s2, d3)); + EXPECT_EQ(stable_hash_combine(s1, s2, s3), stable_hash_combine(s1, d2, s3)); + EXPECT_EQ(stable_hash_combine(s1, s2, s3), stable_hash_combine(d1, s2, s3)); + EXPECT_EQ(stable_hash_combine(s1, s2, s3), stable_hash_combine(d1, d2, s3)); + EXPECT_EQ(stable_hash_combine(s1, s2, s3), stable_hash_combine(d1, d2, d3)); + + // Permuting values causes hashes to change. + EXPECT_NE(stable_hash_combine(i1, i1, i1), stable_hash_combine(i1, i1, i2)); + EXPECT_NE(stable_hash_combine(i1, i1, i1), stable_hash_combine(i1, i2, i1)); + EXPECT_NE(stable_hash_combine(i1, i1, i1), stable_hash_combine(i2, i1, i1)); + EXPECT_NE(stable_hash_combine(i1, i1, i1), stable_hash_combine(i2, i2, i1)); + EXPECT_NE(stable_hash_combine(i1, i1, i1), stable_hash_combine(i2, i2, i2)); + EXPECT_NE(stable_hash_combine(i2, i1, i1), stable_hash_combine(i1, i1, i2)); + EXPECT_NE(stable_hash_combine(i1, i1, i2), stable_hash_combine(i1, i2, i1)); + EXPECT_NE(stable_hash_combine(i1, i2, i1), stable_hash_combine(i2, i1, i1)); + + // Changing type w/o changing value causes hashes to change. + EXPECT_NE(stable_hash_combine(i1, i2, i3), + stable_hash_combine((char)i1, i2, i3)); + EXPECT_NE(stable_hash_combine(i1, i2, i3), + stable_hash_combine(i1, (char)i2, i3)); + EXPECT_NE(stable_hash_combine(i1, i2, i3), + stable_hash_combine(i1, i2, (char)i3)); + + // This is array of uint64, but it should have the exact same byte pattern as + // an array of LargeTestIntegers. + const uint64_t bigarr[] = { + 0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL, + 0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL, + 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL, 0xaaaaaaaaababababULL, + 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, + 0xfefefefededededeULL, 0xafafafafededededULL, 0xffffeeeeddddccccULL, + 0xaaaacbcbffffababULL, 0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, + 0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, + 0xafafafafededededULL, 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL}; + // Hash a preposterously large integer, both aligned with the buffer and + // misaligned. + const LargeTestInteger li = {{0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, + 0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, + 0xfefefefededededeULL, 0xafafafafededededULL, + 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL}}; + // Rotate the storage from 'li'. + const LargeTestInteger l2 = {{0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL, + 0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, + 0xafafafafededededULL, 0xffffeeeeddddccccULL, + 0xaaaacbcbffffababULL, 0xaaaaaaaaababababULL}}; + const LargeTestInteger l3 = {{0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, + 0xfefefefededededeULL, 0xafafafafededededULL, + 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL, + 0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL}}; + EXPECT_EQ(stable_hash_combine_range(begin(bigarr), end(bigarr)), + stable_hash_combine(li, li, li)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 9), + stable_hash_combine(bigarr[0], l2)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 10), + stable_hash_combine(bigarr[0], bigarr[1], l3)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 17), + stable_hash_combine(li, bigarr[0], l2)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 18), + stable_hash_combine(li, bigarr[0], bigarr[1], l3)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 18), + stable_hash_combine(bigarr[0], l2, bigarr[9], l3)); + EXPECT_EQ(stable_hash_combine_range(bigarr, bigarr + 20), + stable_hash_combine(bigarr[0], l2, bigarr[9], l3, bigarr[18], + bigarr[19])); +} + +TEST(StableHashingTest, HashCombineArgs18) { + // This tests that we can pass in up to 18 args. +#define CHECK_SAME(...) \ + EXPECT_EQ(stable_hash_combine(__VA_ARGS__), stable_hash_combine(__VA_ARGS__)) + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8, 9); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7, 8); + CHECK_SAME(1, 2, 3, 4, 5, 6, 7); + CHECK_SAME(1, 2, 3, 4, 5, 6); + CHECK_SAME(1, 2, 3, 4, 5); + CHECK_SAME(1, 2, 3, 4); + CHECK_SAME(1, 2, 3); + CHECK_SAME(1, 2); + CHECK_SAME(1); +#undef CHECK_SAME +} + +} // namespace