diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -21,8 +21,10 @@ class Function; class Module; -uint64_t StructuralHash(const Function &F); -uint64_t StructuralHash(const Module &M); +using IRHash = uint64_t; + +IRHash StructuralHash(const Function &F); +IRHash StructuralHash(const Module &M); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -99,11 +99,6 @@ /// Test whether the two functions have equivalent behaviour. int compare(); - /// Hash a function. Equivalent functions will have the same hash, and unequal - /// functions will have different hashes with high probability. - using FunctionHash = uint64_t; - static FunctionHash functionHash(Function &); - protected: /// Start the comparison. void beginCompare() { diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -27,12 +27,28 @@ public: StructuralHashImpl() : Hash(4) {} + // A function hash is calculated by considering only the number of arguments + // and whether a function is varargs, the order of basic blocks (given by the + // successors of each basic block in depth first order), and the order of + // opcodes of each instruction within each of these basic blocks. This mirrors + // the strategy FunctionComparator::compare() uses to compare functions by + // walking the BBs in depth first order and comparing each instruction in + // sequence. Because this hash currently does not look at the operands, it is + // insensitive to things such as the target of calls and the constants used in + // the function, which makes it useful when possibly merging functions which + // are the same modulo constants and call targets. + // + // Note that different users of StructuralHash will want different behavior + // out of it (i.e., MergeFunctions will want something different from PM + // expensive checks for pass modification status). When modifying this + // function, most changes should be gated behind an option and enabled + // selectively. void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(12345); // Function header + hash(0x6acaa36bef8325c5ULL); // Function header hash(F.isVarArg()); hash(F.arg_size()); @@ -40,11 +56,18 @@ SmallVector BBs; SmallPtrSet VisitedBBs; + // Walk the blocks in the same order as + // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the + // function "structure." (BB and opcode sequence) BBs.push_back(&F.getEntryBlock()); VisitedBBs.insert(BBs[0]); while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - hash(45798); // Block header + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the + // opcodes + hash(45798); for (auto &Inst : *BB) hash(Inst.getOpcode()); @@ -79,13 +102,13 @@ } // namespace -uint64_t llvm::StructuralHash(const Function &F) { +IRHash llvm::StructuralHash(const Function &F) { StructuralHashImpl H; H.update(F); return H.getHash(); } -uint64_t llvm::StructuralHash(const Module &M) { +IRHash llvm::StructuralHash(const Module &M) { StructuralHashImpl H; H.update(M); return H.getHash(); diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -107,6 +107,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/StructuralHash.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -171,15 +172,14 @@ class FunctionNode { mutable AssertingVH F; - FunctionComparator::FunctionHash Hash; + IRHash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. - FunctionNode(Function *F) - : F(F), Hash(FunctionComparator::functionHash(*F)) {} + FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - FunctionComparator::FunctionHash getHash() const { return Hash; } + IRHash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -390,11 +390,10 @@ // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> - HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { - HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); + HashedFuncs.push_back({StructuralHash(Func), &Func}); } } diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -958,67 +958,3 @@ } return 0; } - -namespace { - -// Accumulate the hash of a sequence of 64-bit integers. This is similar to a -// hash of a sequence of 64bit ints, but the entire input does not need to be -// available at once. This interface is necessary for functionHash because it -// needs to accumulate the hash as the structure of the function is traversed -// without saving these values to an intermediate buffer. This form of hashing -// is not often needed, as usually the object to hash is just read from a -// buffer. -class HashAccumulator64 { - uint64_t Hash; - -public: - // Initialize to random constant, so the state isn't zero. - HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } - - void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } - - // No finishing is required, because the entire hash value is used. - uint64_t getHash() { return Hash; } -}; - -} // end anonymous namespace - -// A function hash is calculated by considering only the number of arguments and -// whether a function is varargs, the order of basic blocks (given by the -// successors of each basic block in depth first order), and the order of -// opcodes of each instruction within each of these basic blocks. This mirrors -// the strategy compare() uses to compare functions by walking the BBs in depth -// first order and comparing each instruction in sequence. Because this hash -// does not look at the operands, it is insensitive to things such as the -// target of calls and the constants used in the function, which makes it useful -// when possibly merging functions which are the same modulo constants and call -// targets. -FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { - HashAccumulator64 H; - H.add(F.isVarArg()); - H.add(F.arg_size()); - - SmallVector BBs; - SmallPtrSet VisitedBBs; - - // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), - // accumulating the hash of the function "structure." (BB and opcode sequence) - BBs.push_back(&F.getEntryBlock()); - VisitedBBs.insert(BBs[0]); - while (!BBs.empty()) { - const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes - H.add(45798); - for (const auto &Inst : *BB) { - H.add(Inst.getOpcode()); - } - const Instruction *Term = BB->getTerminator(); - for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(Term->getSuccessor(i)).second) - continue; - BBs.push_back(Term->getSuccessor(i)); - } - } - return H.getHash(); -}