diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -388,6 +388,16 @@ /// been adjusted by the profile inference algorithm. bool HasInferredProfile{false}; + /// Inference stats: + /// the total number of basic blocks in the profile + uint32_t NumStaleBlocks{0}; + /// the number matched basic blocks + uint32_t NumMatchedBlocks{0}; + /// the total count of samples in the profile + uint64_t StaleSampleCount{0}; + /// the count matched samples + uint64_t MatchedSampleCount{0}; + /// For functions with mismatched profile we store all call profile /// information at a function level (as opposed to tying it to /// specific call sites). @@ -1572,6 +1582,22 @@ void setHasInferredProfile(bool Inferred) { HasInferredProfile = Inferred; } + uint32_t getNumStaleBlocks() const { return NumStaleBlocks; } + + void setNumStaleBlocks(uint32_t Count) { NumStaleBlocks = Count; } + + uint32_t getNumMatchedBlocks() const { return NumMatchedBlocks; } + + void setNumMatchedBlocks(uint32_t Count) { NumMatchedBlocks = Count; } + + uint64_t getStaleSampleCount() const { return StaleSampleCount; } + + void setStaleSampleCount(uint64_t Count) { StaleSampleCount = Count; } + + uint64_t getMatchedSampleCount() const { return MatchedSampleCount; } + + void setMatchedSampleCount(uint64_t Count) { MatchedSampleCount = Count; } + void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) { assert(!Instructions.empty()); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1454,6 +1454,25 @@ 100.0 * NumInferredFunctions / NumAllStaleFunctions, 100.0 * InferredSampleCount / TotalSampleCount, InferredSampleCount, TotalSampleCount); + uint32_t TotalNumStaleBlocks = 0; + uint32_t TotalNumMatchedBlocks = 0; + uint64_t TotalStaleSampleCount = 0; + uint64_t TotalMatchedSampleCount = 0; + for (const BinaryFunction *BF : ProfiledFunctions) { + if (!BF->hasInferredProfile()) + continue; + TotalNumStaleBlocks += BF->getNumStaleBlocks(); + TotalNumMatchedBlocks += BF->getNumMatchedBlocks(); + TotalStaleSampleCount += BF->getStaleSampleCount(); + TotalMatchedSampleCount += BF->getMatchedSampleCount(); + } + outs() << format("BOLT-INFO: stale inference matched %.2f%% of basic blocks" + " (%zu out of %zu stale) responsible for %.2f%% samples" + " (%zu out of %zu stale)\n", + 100.0 * TotalNumMatchedBlocks / TotalNumStaleBlocks, + TotalNumMatchedBlocks, TotalNumStaleBlocks, + 100.0 * TotalMatchedSampleCount / TotalStaleSampleCount, + TotalMatchedSampleCount, TotalStaleSampleCount); } if (const uint64_t NumUnusedObjects = BC.getNumUnusedProfiledObjects()) { diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp --- a/bolt/lib/Profile/StaleProfileMatching.cpp +++ b/bolt/lib/Profile/StaleProfileMatching.cpp @@ -236,9 +236,8 @@ /// Find the most similar block for a given hash. const FlowBlock *matchBlock(BlendedBlockHash BlendedHash) const { auto BlockIt = OpHashToBlocks.find(BlendedHash.OpcodeHash); - if (BlockIt == OpHashToBlocks.end()) { + if (BlockIt == OpHashToBlocks.end()) return nullptr; - } FlowBlock *BestBlock = nullptr; uint64_t BestDist = std::numeric_limits::max(); for (auto It : BlockIt->second) { @@ -393,7 +392,8 @@ /// of the basic blocks in the binary, the count is "matched" to the block. /// Similarly, if both the source and the target of a count in the profile are /// matched to a jump in the binary, the count is recorded in CFG. -void matchWeightsByHashes(const BinaryFunction::BasicBlockOrderType &BlockOrder, +void matchWeightsByHashes(BinaryFunction &BF, + const BinaryFunction::BasicBlockOrderType &BlockOrder, const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) { assert(Func.Blocks.size() == BlockOrder.size() + 1); @@ -475,12 +475,30 @@ // Assign block counts based on in-/out- jumps for (FlowBlock &Block : Func.Blocks) { if (OutWeight[Block.Index] == 0 && InWeight[Block.Index] == 0) { - assert(Block.HasUnknownWeight && "unmatched block with positive count"); + assert(Block.HasUnknownWeight && "unmatched block with a positive count"); continue; } Block.HasUnknownWeight = false; Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]); } + + // Compute and assign matching stats + uint32_t NumStaleBlocks = 0; + uint32_t NumMatchedBlocks = 0; + uint64_t StaleSampleCount = 0; + uint64_t MatchedSampleCount = 0; + for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { + NumStaleBlocks += 1; + StaleSampleCount += YamlBB.ExecCount; + if (MatchedBlocks.find(YamlBB.Index) != MatchedBlocks.end()) { + NumMatchedBlocks += 1; + MatchedSampleCount += YamlBB.ExecCount; + } + } + BF.setNumStaleBlocks(NumStaleBlocks); + BF.setNumMatchedBlocks(NumMatchedBlocks); + BF.setStaleSampleCount(StaleSampleCount); + BF.setMatchedSampleCount(MatchedSampleCount); } /// The function finds all blocks that are (i) reachable from the Entry block @@ -702,7 +720,7 @@ FlowFunction Func = createFlowFunction(BlockOrder); // Match as many block/jump counts from the stale profile as possible - matchWeightsByHashes(BlockOrder, YamlBF, Func); + matchWeightsByHashes(BF, BlockOrder, YamlBF, Func); // Adjust the flow function by marking unreachable blocks Unlikely so that // they don't get any counts assigned