diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -23,8 +23,18 @@ using IRHash = uint64_t; -IRHash StructuralHash(const Function &F); -IRHash StructuralHash(const Module &M); +/// Returns a hash of the function \p F. +/// \param F The function to hash. +/// \param DetailedHash Whether or not to encode additional information in the +/// hash. The additional information added into the hash when this flag is set +/// to true includes instruction and operand type information. +IRHash StructuralHash(const Function &F, bool DetailedHash = false); + +/// Returns a hash of the module \p M by hashing all functions and global +/// variables contained within. \param M The module to hash. \param DetailedHash +/// Whether or not to encode additional information in the function hashes that +/// composed the module hash. +IRHash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp --- a/llvm/lib/IR/Pass.cpp +++ b/llvm/lib/IR/Pass.cpp @@ -139,9 +139,15 @@ #endif #ifdef EXPENSIVE_CHECKS -uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); } +// TODO: Use detailed structural hashing once exposed bugs have been fixed/ +// (https://github.com/llvm/llvm-project/issues/64938) +uint64_t Pass::structuralHash(Module &M) const { + return StructuralHash(M, false); +} -uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); } +uint64_t Pass::structuralHash(Function &F) const { + return StructuralHash(F, false); +} #endif //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -7,8 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/StructuralHash.h" +#include "llvm/ADT/Hashing.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" using namespace llvm; @@ -24,9 +28,59 @@ void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + // This will produce different values on 32-bit and 64-bit systens as + // hash_combine returns a size_t. However, this is only used for + // detailed hashing which, in-tree, only needs to distinguish between + // differences in functions. + template void hashArbitaryType(const T &V) { + hash(hash_combine(V)); + } + + void hashType(Type *ValueType) { + hash(ValueType->getTypeID()); + if (ValueType->isIntegerTy()) + hash(ValueType->getIntegerBitWidth()); + } + public: StructuralHashImpl() : Hash(4) {} + void updateOperand(Value *Operand) { + hashType(Operand->getType()); + + // The cases enumerated below are not exhaustive and are only aimed to + // get decent coverage over the function. + if (ConstantInt *ConstInt = dyn_cast(Operand)) { + hashArbitaryType(ConstInt->getValue()); + } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { + hashArbitaryType(ConstFP->getValue()); + } else if (Argument *Arg = dyn_cast(Operand)) { + hash(Arg->getArgNo()); + } else if (Function *Func = dyn_cast(Operand)) { + // Hashing the name will be deterministic as LLVM's hashing infrastructure + // has explicit support for hashing strings and will not simply hash + // the pointer. + hashArbitaryType(Func->getName()); + } + } + + void updateInstruction(const Instruction &Inst, bool DetailedHash) { + hash(Inst.getOpcode()); + + if (!DetailedHash) + return; + + hashType(Inst.getType()); + + // Handle additional properties of specific instructions that cause + // semantic differences in the IR. + if (const auto *ComparisonInstruction = dyn_cast(&Inst)) + hash(ComparisonInstruction->getPredicate()); + + for (const auto &Op : Inst.operands()) + updateOperand(Op); + } + // A function hash is calculated by considering only the number of arguments // and whether a function is varargs, the order of basic blocks (given by the // successors of each basic block in depth first order), and the order of @@ -43,7 +97,7 @@ // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F) { + void update(const Function &F, bool DetailedHash) { // Declarations don't affect analyses. if (F.isDeclaration()) return; @@ -69,7 +123,7 @@ // opcodes hash(45798); for (auto &Inst : *BB) - hash(Inst.getOpcode()); + updateInstruction(Inst, DetailedHash); const Instruction *Term = BB->getTerminator(); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { @@ -90,11 +144,11 @@ hash(GV.getValueType()->getTypeID()); } - void update(const Module &M) { + void update(const Module &M, bool DetailedHash) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F); + update(F, DetailedHash); } uint64_t getHash() const { return Hash; } @@ -102,14 +156,14 @@ } // namespace -IRHash llvm::StructuralHash(const Function &F) { +IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { StructuralHashImpl H; - H.update(F); + H.update(F, DetailedHash); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M) { +IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H; - H.update(M); + H.update(M, DetailedHash); return H.getHash(); } diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -91,8 +91,8 @@ LLVMContext Ctx; std::unique_ptr M1 = parseIR(Ctx, "define void @f() { ret void }"); std::unique_ptr M2 = parseIR(Ctx, "define i32 @f() { ret i32 0 }"); - // FIXME: should be different EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } TEST(StructuralHashTest, InstructionOpCode) { @@ -109,7 +109,7 @@ EXPECT_NE(StructuralHash(*M1), StructuralHash(*M2)); } -TEST(StructuralHashTest, InstructionType) { +TEST(StructuralHashTest, InstructionSubType) { LLVMContext Ctx; std::unique_ptr M1 = parseIR(Ctx, "define void @f(ptr %p) {\n" " %a = load i32, ptr %p\n" @@ -119,8 +119,22 @@ " %a = load i64, ptr %p\n" " ret void\n" "}\n"); - // FIXME: should be different EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, InstructionType) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define void @f(ptr %p) {\n" + " %1 = load i32, ptr %p\n" + " ret void\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define void @f(ptr %p) {\n" + " %1 = load float, ptr %p\n" + " ret void\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } TEST(StructuralHashTest, IgnoredMetadata) { @@ -138,6 +152,91 @@ !0 = !{} !1 = !{ptr @llvm.embedded.object, !".llvm.lto"} )"); + // clang-format on + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); +} + +TEST(StructuralHashTest, ComparisonInstructionPredicate) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n" + " %1 = icmp eq i64 %a, %b\n" + " ret i1 %1\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n" + " %1 = icmp ne i64 %a, %b\n" + " ret i1 %1\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, IntrinsicInstruction) { + LLVMContext Ctx; + std::unique_ptr M1 = + parseIR(Ctx, "define float @f(float %a) {\n" + " %b = call float @llvm.sin.f32(float %a)\n" + " ret float %b\n" + "}\n" + "declare float @llvm.sin.f32(float)\n"); + std::unique_ptr M2 = + parseIR(Ctx, "define float @f(float %a) {\n" + " %b = call float @llvm.cos.f32(float %a)\n" + " ret float %b\n" + "}\n" + "declare float @llvm.cos.f32(float)\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, CallInstruction) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %b = call i64 @f1(i64 %a)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %b = call i64 @f2(i64 %a)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, ConstantInteger) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f1() {\n" + " ret i64 1\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @f2() {\n" + " ret i64 2\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, BigConstantInteger) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i128 @f1() {\n" + " ret i128 18446744073709551616\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define i128 @f2() {\n" + " ret i128 18446744073709551617\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, ArgumentNumber) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f1(i64 %a, i64 %b) {\n" + " ret i64 %a\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @f2(i64 %a, i64 %b) {\n" + " ret i64 %b\n" + "}\n"); EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } } // end anonymous namespace