diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -638,9 +638,22 @@ /// Total hotness score according to profiling data for this binary. uint64_t TotalScore{0}; - /// Binary-wide stats for macro-fusion. - uint64_t MissedMacroFusionPairs{0}; - uint64_t MissedMacroFusionExecCount{0}; + /// Binary-wide aggregated stats. + struct BinaryStats { + /// Stats for macro-fusion. + uint64_t MissedMacroFusionPairs{0}; + uint64_t MissedMacroFusionExecCount{0}; + + /// Stats for stale profile matching: + /// the total number of basic blocks in the profile + uint32_t NumStaleBlocks{0}; + /// the number of matched basic blocks + uint32_t NumMatchedBlocks{0}; + /// the total count of samples in the profile + uint64_t StaleSampleCount{0}; + /// the count of matched samples + uint64_t MatchedSampleCount{0}; + } Stats; // Address of the first allocated segment. uint64_t FirstAllocAddress{std::numeric_limits::max()}; diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -381,7 +381,7 @@ /// Profile match ratio. float ProfileMatchRatio{0.0f}; - /// Raw branch count for this function in the profile + /// Raw branch count for this function in the profile. uint64_t RawBranchCount{0}; /// Indicates the type of profile the function is using. diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2221,8 +2221,8 @@ << Twine::utohexstr(getAddress() + Offset) << " in function " << *this << "; executed " << BB.getKnownExecutionCount() << " times.\n"); - ++BC.MissedMacroFusionPairs; - BC.MissedMacroFusionExecCount += BB.getKnownExecutionCount(); + ++BC.Stats.MissedMacroFusionPairs; + BC.Stats.MissedMacroFusionExecCount += BB.getKnownExecutionCount(); } } diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1454,6 +1454,14 @@ 100.0 * NumInferredFunctions / NumAllStaleFunctions, 100.0 * InferredSampleCount / TotalSampleCount, InferredSampleCount, TotalSampleCount); + outs() << format( + "BOLT-INFO: inference found an exact match for %.2f%% of basic blocks" + " (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * BC.Stats.NumMatchedBlocks / BC.Stats.NumStaleBlocks, + BC.Stats.NumMatchedBlocks, BC.Stats.NumStaleBlocks, + 100.0 * BC.Stats.MatchedSampleCount / BC.Stats.StaleSampleCount, + BC.Stats.MatchedSampleCount, BC.Stats.StaleSampleCount); } if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) { @@ -1562,10 +1570,11 @@ } // Print information on missed macro-fusion opportunities seen on input. - if (BC.MissedMacroFusionPairs) { - outs() << "BOLT-INFO: the input contains " << BC.MissedMacroFusionPairs - << " (dynamic count : " << BC.MissedMacroFusionExecCount - << ") opportunities for macro-fusion optimization"; + if (BC.Stats.MissedMacroFusionPairs) { + outs() << format("BOLT-INFO: the input contains %zu (dynamic count : %zu)" + " opportunities for macro-fusion optimization", + BC.Stats.MissedMacroFusionPairs, + BC.Stats.MissedMacroFusionExecCount); switch (opts::AlignMacroOpFusion) { case MFT_NONE: outs() << ". Use -align-macro-fusion to fix.\n"; diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -236,14 +236,11 @@ /// Find the most similar block for a given hash. const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const { auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); - if (BlockIt == OpHashToBlocks.end()) { + if (BlockIt == OpHashToBlocks.end()) return nullptr; - } FlowBlock *BestBlock = nullptr; uint64_t BestDist = std::numeric_limits::max(); - for (auto It : BlockIt->second) { - FlowBlock *Block = It.second; - BlendedBlockHash Hash = It.first; + for (const auto &[Hash, Block] : BlockIt->second) { uint64_t Dist = Hash.distance(BlendedHash); if (BestBlock == nullptr || Dist < BestDist) { BestDist = Dist; @@ -253,6 +250,14 @@ return BestBlock; } + /// Returns true if the two basic blocks (in the binary and in the profile) + /// corresponding to the given hashes are matched to each other with a high + /// confidence. + static bool isHighConfidenceMatch(BlendedBlockHash Hash1, + BlendedBlockHash Hash2) { + return Hash1.InstrHash == Hash2.InstrHash; + } + private: using HashBlockPairType = std::pair; std::unordered_map> OpHashToBlocks; @@ -393,7 +398,8 @@ /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -void matchWeightsByHashes(const BinaryFunction::BasicBlockOrderType &BlockOrder, +void matchWeightsByHashes(BinaryContext &BC, + const BinaryFunction::BasicBlockOrderType &BlockOrder, const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) { assert(Func.Blocks.size() == BlockOrder.size() + 1); @@ -417,19 +423,29 @@ // Match blocks from the profile to the blocks in CFG for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { assert(YamlBB.Hash != 0 && "empty hash of BinaryBasicBlockProfile"); - BlendedBlockHash BlendedHash(YamlBB.Hash); - const FlowBlock *MatchedBlock = Matcher.matchBlock(BlendedHash); + BlendedBlockHash YamlHash(YamlBB.Hash); + const FlowBlock *MatchedBlock = Matcher.matchBlock(YamlHash); if (MatchedBlock != nullptr) { MatchedBlocks[YamlBB.Index] = MatchedBlock; LLVM_DEBUG(dbgs() << "Matched yaml block with bid = " << YamlBB.Index << " and hash = " << Twine::utohexstr(YamlBB.Hash) << " to BB with index = " << MatchedBlock->Index - 1 << "\n"); + // Update matching stats accounting for the matched block. + BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1]; + if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) { + ++BC.Stats.NumMatchedBlocks; + BC.Stats.MatchedSampleCount += YamlBB.ExecCount; + } } else { LLVM_DEBUG( dbgs() << "Couldn't match yaml block with bid = " << YamlBB.Index << " and hash = " << Twine::utohexstr(YamlBB.Hash) << "\n"); } + + // Update matching stats. + ++BC.Stats.NumStaleBlocks; + BC.Stats.StaleSampleCount += YamlBB.ExecCount; } // Match jumps from the profile to the jumps from CFG @@ -475,7 +491,7 @@ // Assign block counts based on in-/out- jumps for (FlowBlock &Block : Func.Blocks) { if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { - assert(Block.HasUnknownWeight && "unmatched block with positive count"); + assert(Block.HasUnknownWeight && "unmatched block with a positive count"); continue; } Block.HasUnknownWeight = false; @@ -702,7 +718,7 @@ FlowFunction Func = createFlowFunction(BlockOrder); // Match as many block/jump counts from the stale profile as possible - matchWeightsByHashes(BlockOrder, YamlBF, Func); + matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func); // Adjust the flow function by marking unreachable blocks Unlikely so that // they don't get any counts assigned