diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h --- a/bolt/include/bolt/Core/HashUtilities.h +++ b/bolt/include/bolt/Core/HashUtilities.h @@ -20,8 +20,6 @@ namespace llvm { namespace bolt { -uint16_t hash_64_to_16(const uint64_t Hash); - std::string hashInteger(uint64_t Value); std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol); @@ -35,6 +33,8 @@ std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB, OperandHashFuncTy OperandHashFunc); +std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB); + } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -497,6 +497,11 @@ return 0; } + virtual unsigned getShortOpcode(unsigned Opcode) const { + llvm_unreachable("not implemented"); + return 0; + } + /// Create increment contents of target by 1 for Instrumentation virtual InstructionListType createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp --- a/bolt/lib/Core/HashUtilities.cpp +++ b/bolt/lib/Core/HashUtilities.cpp @@ -130,5 +130,51 @@ return HashString; } +/// A "loose" hash of a basic block to use with the stale profile matching. The +/// computed value will be the same for blocks with minor changes (such as +/// reordering of instructions or using different operands) but may result in +/// collisions that need to be resolved by a stronger hashing. +std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) { + const bool IsX86 = BC.isX86(); + + // The hash is computed by creating a string of all lexicographically ordered + // instruction opcodes, which is then hashed with std::hash. + std::set Opcodes; + for (const MCInst &Inst : BB) { + if (BC.MIB->isPseudo(Inst)) + continue; + + // Ignore unconditional jumps, as they can be added / removed as a result + // of basic block reordering. + if (BC.MIB->isUnconditionalBranch(Inst)) + continue; + + // Do not distinguish different types of conditional jumps. + if (BC.MIB->isConditionalBranch(Inst)) { + Opcodes.insert("JMP"); + continue; + } + + unsigned Opcode = Inst.getOpcode(); + if (IsX86) + Opcode = BC.MIB->getShortOpcode(Opcode); + + if (Opcode > 0) { + std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first; + Mnemonic.erase( + std::find_if(Mnemonic.rbegin(), Mnemonic.rend(), + [](unsigned char ch) { return !std::isspace(ch); }) + .base(), + Mnemonic.end()); + Opcodes.insert(Mnemonic); + } + } + + std::string HashString; + for (const std::string &Opcode : Opcodes) + HashString.append(Opcode); + return HashString; +} + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -73,64 +73,39 @@ cl::opt StaleMatchingCostBlockInc( "stale-matching-cost-block-inc", - cl::desc("The cost of increasing a block's count by one."), cl::init(110), + cl::desc("The cost of increasing a block count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); cl::opt StaleMatchingCostBlockDec( "stale-matching-cost-block-dec", - cl::desc("The cost of decreasing a block's count by one."), cl::init(100), + cl::desc("The cost of decreasing a block count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostBlockEntryInc( - "stale-matching-cost-block-entry-inc", - cl::desc("The cost of increasing the entry block's count by one."), - cl::init(110), cl::ReallyHidden, cl::cat(BoltOptCategory)); - -cl::opt StaleMatchingCostBlockEntryDec( - "stale-matching-cost-block-entry-dec", - cl::desc("The cost of decreasing the entry block's count by one."), - cl::init(100), cl::ReallyHidden, cl::cat(BoltOptCategory)); - -cl::opt StaleMatchingCostBlockZeroInc( - "stale-matching-cost-block-zero-inc", - cl::desc("The cost of increasing a count of zero-weight block by one."), - cl::init(10), cl::Hidden, cl::cat(BoltOptCategory)); - -cl::opt StaleMatchingCostBlockUnknownInc( - "stale-matching-cost-block-unknown-inc", - cl::desc("The cost of increasing an unknown block's count by one."), - cl::init(10), cl::ReallyHidden, cl::cat(BoltOptCategory)); - cl::opt StaleMatchingCostJumpInc( "stale-matching-cost-jump-inc", - cl::desc("The cost of increasing a jump's count by one."), cl::init(100), + cl::desc("The cost of increasing a jump count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpFTInc( - "stale-matching-cost-jump-ft-inc", - cl::desc("The cost of increasing a fall-through jump's count by one."), - cl::init(100), cl::ReallyHidden, cl::cat(BoltOptCategory)); - cl::opt StaleMatchingCostJumpDec( "stale-matching-cost-jump-dec", - cl::desc("The cost of decreasing a jump's count by one."), cl::init(110), + cl::desc("The cost of decreasing a jump count by one."), cl::init(150), cl::ReallyHidden, cl::cat(BoltOptCategory)); -cl::opt StaleMatchingCostJumpFTDec( - "stale-matching-cost-jump-ft-dec", - cl::desc("The cost of decreasing a fall-through jump's count by one."), - cl::init(110), cl::ReallyHidden, cl::cat(BoltOptCategory)); +cl::opt StaleMatchingCostBlockUnknownInc( + "stale-matching-cost-block-unknown-inc", + cl::desc("The cost of increasing an unknown block count by one."), + cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory)); cl::opt StaleMatchingCostJumpUnknownInc( "stale-matching-cost-jump-unknown-inc", - cl::desc("The cost of increasing an unknown jump's count by one."), - cl::init(50), cl::ReallyHidden, cl::cat(BoltOptCategory)); + cl::desc("The cost of increasing an unknown jump count by one."), + cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory)); cl::opt StaleMatchingCostJumpUnknownFTInc( "stale-matching-cost-jump-unknown-ft-inc", cl::desc( - "The cost of increasing an unknown fall-through jump's count by one."), - cl::init(5), cl::ReallyHidden, cl::cat(BoltOptCategory)); + "The cost of increasing an unknown fall-through jump count by one."), + cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory)); } // namespace opts @@ -145,7 +120,8 @@ using ValueOffset = Bitfield::Element; using ValueOpcode = Bitfield::Element; using ValueInstr = Bitfield::Element; - using ValueNeighbor = Bitfield::Element; + using ValuePred = Bitfield::Element; + using ValueSucc = Bitfield::Element; public: explicit BlendedBlockHash() {} @@ -154,7 +130,8 @@ Offset = Bitfield::get(Hash); OpcodeHash = Bitfield::get(Hash); InstrHash = Bitfield::get(Hash); - NeighborHash = Bitfield::get(Hash); + PredHash = Bitfield::get(Hash); + SuccHash = Bitfield::get(Hash); } /// Combine the blended hash into uint64_t. @@ -163,7 +140,8 @@ Bitfield::set(Hash, Offset); Bitfield::set(Hash, OpcodeHash); Bitfield::set(Hash, InstrHash); - Bitfield::set(Hash, NeighborHash); + Bitfield::set(Hash, PredHash); + Bitfield::set(Hash, SuccHash); return Hash; } @@ -175,7 +153,8 @@ "incorrect blended hash distance computation"); uint64_t Dist = 0; // Account for NeighborHash - Dist += NeighborHash == BBH.NeighborHash ? 0 : 1; + Dist += SuccHash == BBH.SuccHash ? 0 : 1; + Dist += PredHash == BBH.PredHash ? 0 : 1; Dist <<= 16; // Account for InstrHash Dist += InstrHash == BBH.InstrHash ? 0 : 1; @@ -192,9 +171,10 @@ /// (Strong) Hash of the basic block instructions, including opcodes and /// operands. uint16_t InstrHash{0}; - /// Hash of the (loose) basic block together with (loose) hashes of its - /// successors and predecessors. - uint16_t NeighborHash{0}; + /// (Loose) Hashes of the predecessors of the basic block. + uint8_t PredHash{0}; + /// (Loose) Hashes of the successors of the basic block. + uint8_t SuccHash{0}; }; /// The object is used to identify and match basic blocks in a BinaryFunction @@ -252,41 +232,43 @@ std::vector BlendedHashes(BasicBlocks.size()); std::vector OpcodeHashes(BasicBlocks.size()); - // Initialize hash components + // Initialize hash components. for (size_t I = 0; I < BasicBlocks.size(); I++) { const BinaryBasicBlock *BB = BasicBlocks[I]; assert(BB->getIndex() == I && "incorrect block index"); BlendedHashes[I].Offset = BB->getOffset(); - // Hashing complete instructions + // Hashing complete instructions. std::string InstrHashStr = hashBlock( BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); }); uint64_t InstrHash = std::hash{}(InstrHashStr); - BlendedHashes[I].InstrHash = hash_64_to_16(InstrHash); - // Hashing opcodes - std::string OpcodeHashStr = - hashBlock(BC, *BB, [](const MCOperand &Op) { return std::string(); }); + BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash); + // Hashing opcodes. + std::string OpcodeHashStr = hashBlockLoose(BC, *BB); OpcodeHashes[I] = std::hash{}(OpcodeHashStr); - BlendedHashes[I].OpcodeHash = hash_64_to_16(OpcodeHashes[I]); + BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]); } - // Initialize neighbor hash + // Initialize neighbor hash. for (size_t I = 0; I < BasicBlocks.size(); I++) { const BinaryBasicBlock *BB = BasicBlocks[I]; - uint64_t Hash = OpcodeHashes[I]; - // Append hashes of successors + // Append hashes of successors. + uint64_t Hash = 0; for (BinaryBasicBlock *SuccBB : BB->successors()) { uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()]; Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); } - // Append hashes of predecessors + BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash); + + // Append hashes of predecessors. + Hash = 0; for (BinaryBasicBlock *PredBB : BB->predecessors()) { uint64_t PredHash = OpcodeHashes[PredBB->getIndex()]; Hash = hashing::detail::hash_16_bytes(Hash, PredHash); } - BlendedHashes[I].NeighborHash = hash_64_to_16(Hash); + BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash); } - // Assign hashes + // Assign hashes. for (size_t I = 0; I < BasicBlocks.size(); I++) { const BinaryBasicBlock *BB = BasicBlocks[I]; BB->setHash(BlendedHashes[I].combine()); @@ -575,16 +557,15 @@ Params.JoinIslands = opts::StaleMatchingJoinIslands; Params.CostBlockInc = opts::StaleMatchingCostBlockInc; + Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc; Params.CostBlockDec = opts::StaleMatchingCostBlockDec; - Params.CostBlockEntryInc = opts::StaleMatchingCostBlockEntryInc; - Params.CostBlockEntryDec = opts::StaleMatchingCostBlockEntryDec; - Params.CostBlockZeroInc = opts::StaleMatchingCostBlockZeroInc; + Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec; Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc; Params.CostJumpInc = opts::StaleMatchingCostJumpInc; - Params.CostJumpFTInc = opts::StaleMatchingCostJumpFTInc; + Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc; Params.CostJumpDec = opts::StaleMatchingCostJumpDec; - Params.CostJumpFTDec = opts::StaleMatchingCostJumpFTDec; + Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec; Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc; Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc; diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2871,6 +2871,10 @@ } } + unsigned getShortOpcode(unsigned Opcode) const override { + return X86::getOpcodeForShortImmediateForm(Opcode); + } + MCPhysReg getIntArgRegister(unsigned ArgNo) const override { // FIXME: this should depend on the calling convention. switch (ArgNo) { diff --git a/bolt/test/X86/Inputs/blarge_profile_stale.yaml b/bolt/test/X86/Inputs/blarge_profile_stale.yaml --- a/bolt/test/X86/Inputs/blarge_profile_stale.yaml +++ b/bolt/test/X86/Inputs/blarge_profile_stale.yaml @@ -37,15 +37,15 @@ blocks: - bid: 0 insns: 4 - hash: 0xb1e5b76571270000 + hash: 0x1111111111111111 exec: 20 succ: [ { bid: 1, cnt: 0 } ] - bid: 1 insns: 9 - hash: 0x587e93788b970010 + hash: 0x27e43a5e10cd0010 succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ] - bid: 3 insns: 2 - hash: 0x20e605d745e50039 + hash: 0x4db935b6471e0039 succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ] ... diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test --- a/bolt/test/X86/reader-stale-yaml.test +++ b/bolt/test/X86/reader-stale-yaml.test @@ -37,3 +37,4 @@ # Check the overal inference stats. CHECK: 2 out of 7 functions in the binary (28.6%) have non-empty execution profile CHECK: inferred profile for 1 (50.00% of profiled, 100.00% of stale) functions responsible for 87.31% samples (640 out of 733) +CHECK: inference found an exact match for 66.67% of basic blocks (2 out of 3 stale)