diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -23,8 +23,18 @@ using IRHash = uint64_t; -IRHash StructuralHash(const Function &F); -IRHash StructuralHash(const Module &M); +/// Returns a hash of the function \p F. +/// \param F The function to hash. +/// \param DetailedHash Whether or not to encode additional information in the +/// hash. The additional information added into the hash when this flag is set +/// to true includes instruction and operand type information. +IRHash StructuralHash(const Function &F, bool DetailedHash = false); + +/// Returns a hash of the module \p M by hashing all functions and global +/// variables contained within. \param M The module to hash. \param DetailedHash +/// Whether or not to encode additional information in the function hashes that +/// composed the module hash. +IRHash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp --- a/llvm/lib/IR/Pass.cpp +++ b/llvm/lib/IR/Pass.cpp @@ -139,9 +139,13 @@ #endif #ifdef EXPENSIVE_CHECKS -uint64_t Pass::structuralHash(Module &M) const { return StructuralHash(M); } +uint64_t Pass::structuralHash(Module &M) const { + return StructuralHash(M, true); +} -uint64_t Pass::structuralHash(Function &F) const { return StructuralHash(F); } +uint64_t Pass::structuralHash(Function &F) const { + return StructuralHash(F, true); +} #endif //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -9,6 +9,9 @@ #include "llvm/IR/StructuralHash.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" using namespace llvm; @@ -43,7 +46,7 @@ // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F) { + void update(const Function &F, bool DetailedHash) { // Declarations don't affect analyses. if (F.isDeclaration()) return; @@ -68,8 +71,26 @@ // opcodes into BBs wouldn't affect the hash, only the order of the // opcodes hash(45798); - for (auto &Inst : *BB) + for (auto &Inst : *BB) { hash(Inst.getOpcode()); + if (DetailedHash) { + hash(Inst.getType()); + + // Handle additional properties of specific instructions that cause + // semantic differences in the IR. + if (const CmpInst *ComparisonInstruction = dyn_cast(&Inst)) + hash(ComparisonInstruction->getPredicate()); + if (const IntrinsicInst *InstrinsicInstruction = + dyn_cast(&Inst)) + hash(InstrinsicInstruction->getIntrinsicID()); + if (const CallInst *CallInstruction = dyn_cast(&Inst)) + hash(CallInstruction->getCalledFunction()->getName()); + + for (unsigned I = 0; I < Inst.getNumOperands(); ++I) { + hash(Inst.getOperand(I)->getType()); + } + } + } const Instruction *Term = BB->getTerminator(); for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { @@ -90,11 +111,11 @@ hash(GV.getValueType()->getTypeID()); } - void update(const Module &M) { + void update(const Module &M, bool DetailedHash) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F); + update(F, DetailedHash); } uint64_t getHash() const { return Hash; } @@ -102,14 +123,14 @@ } // namespace -IRHash llvm::StructuralHash(const Function &F) { +IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { StructuralHashImpl H; - H.update(F); + H.update(F, DetailedHash); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M) { +IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { StructuralHashImpl H; - H.update(M); + H.update(M, DetailedHash); return H.getHash(); } diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -91,8 +91,8 @@ LLVMContext Ctx; std::unique_ptr M1 = parseIR(Ctx, "define void @f() { ret void }"); std::unique_ptr M2 = parseIR(Ctx, "define i32 @f() { ret i32 0 }"); - // FIXME: should be different EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } TEST(StructuralHashTest, InstructionOpCode) { @@ -109,7 +109,7 @@ EXPECT_NE(StructuralHash(*M1), StructuralHash(*M2)); } -TEST(StructuralHashTest, InstructionType) { +TEST(StructuralHashTest, InstructionSubType) { LLVMContext Ctx; std::unique_ptr M1 = parseIR(Ctx, "define void @f(ptr %p) {\n" " %a = load i32, ptr %p\n" @@ -119,8 +119,22 @@ " %a = load i64, ptr %p\n" " ret void\n" "}\n"); - // FIXME: should be different EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, InstructionType) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define void @f(ptr %p) {\n" + " %1 = load i32, ptr %p\n" + " ret void\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define void @f(ptr %p) {\n" + " %1 = load float, ptr %p\n" + " ret void\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } TEST(StructuralHashTest, IgnoredMetadata) { @@ -138,6 +152,53 @@ !0 = !{} !1 = !{ptr @llvm.embedded.object, !".llvm.lto"} )"); + // clang-format on EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); } + +TEST(StructuralHashTest, ComparisonInstructionPredicate) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n" + " %1 = icmp eq i64 %a, %b\n" + " ret i1 %1\n" + "}\n"); + std::unique_ptr M2 = parseIR(Ctx, "define i1 @f(i64 %a, i64 %b) {\n" + " %1 = icmp ne i64 %a, %b\n" + " ret i1 %1\n" + "}\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, IntrinsicInstruction) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define float @f(float %a) {\n" + " %b = call float @llvm.sin.f32(float %a)\n" + " ret float %b\n" + "}\n" + "declare float @llvm.sin.f32(float)\n"); + std::unique_ptr M2 = parseIR(Ctx, "define float @f(float %a) {\n" + " %b = call float @llvm.cos.f32(float %a)\n" + " ret float %b\n" + "}\n" + "declare float @llvm.cos.f32(float)\n"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} + +TEST(StructuralHashTest, CallInstruction) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %b = call i64 @f1(i64 %a)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %b = call i64 @f2(i64 %a)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); + EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); +} } // end anonymous namespace