diff --git a/compiler-rt/test/profile/instrprof-block-coverage.c b/compiler-rt/test/profile/instrprof-block-coverage.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-block-coverage.c @@ -0,0 +1,47 @@ +// RUN: %clang_pgogen -mllvm -pgo-block-coverage %s -o %t.out +// RUN: env LLVM_PROFILE_FILE=%t1.profraw %run %t.out 1 +// RUN: env LLVM_PROFILE_FILE=%t2.profraw %run %t.out 2 +// RUN: llvm-profdata merge -o %t.profdata %t1.profraw %t2.profraw +// RUN: %clang_profuse=%t.profdata -mllvm -pgo-verify-bfi -o - -S -emit-llvm %s 2>%t.errs | FileCheck %s --implicit-check-not="!prof" +// RUN: FileCheck %s < %t.errs --allow-empty --check-prefix=CHECK-ERROR + +#include + +// CHECK: @foo({{.*}}) +// CHECK-SAME: !prof ![[PROF0:[0-9]+]] +void foo(int a) { + // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF1:[0-9]+]] + if (a % 2 == 0) { + // + } else { + // + } + + // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF1]] + for (int i = 1; i < a; i++) { + // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF2:[0-9]+]] + if (a % 3 == 0) { + // + } else { + // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF2]] + if (a % 1001 == 0) { + return; + } + } + } + + return; +} + +// CHECK: @main({{.*}}) +// CHECK-SAME: !prof ![[PROF0]] +int main(int argc, char *argv[]) { + foo(atoi(argv[1])); + return 0; +} + +// CHECK-DAG: ![[PROF0]] = !{!"function_entry_count", i64 10000} +// CHECK-DAG: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1} +// CHECK-DAG: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1} + +// CHECK-ERROR-NOT: warning: {{.*}}: Found inconsistent block coverage diff --git a/compiler-rt/test/profile/instrprof-coverage.c b/compiler-rt/test/profile/instrprof-entry-coverage.c rename from compiler-rt/test/profile/instrprof-coverage.c rename to compiler-rt/test/profile/instrprof-entry-coverage.c diff --git a/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h @@ -0,0 +1,86 @@ +//===-- BlockCoverageInference.h - Minimal Execution Coverage ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file finds the minimum set of blocks on a CFG that must be instrumented +/// to infer execution coverage for the whole graph. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class Function; +class BasicBlock; +class DotFuncBCIInfo; + +class BlockCoverageInference { + friend class DotFuncBCIInfo; + +public: + using BlockSet = SmallSetVector; + + BlockCoverageInference(const Function &F, bool ForceInstrumentEntry); + + /// \return true if \p BB should be instrumented for coverage. + bool shouldInstrumentBlock(const BasicBlock &BB) const; + + /// \return the set of blocks \p Deps such that \p BB is covered iff any + /// blocks in \p Deps are covered. + BlockSet getDependencies(const BasicBlock &BB) const; + + /// \return a hash that depends on the set of instrumented blocks. + uint64_t getInstrumentedBlocksHash() const; + + /// Dump the inference graph. + void dump(raw_ostream &OS) const; + + /// View the inferred block coverage as a dot file. + /// Filled gray blocks are instrumented, red outlined blocks are found to be + /// covered, red edges show that a block's coverage can be inferred from its + /// successors, and blue edges show that a block's coverage can be inferred + /// from its predecessors. + void viewBlockCoverageGraph( + const DenseMap *Coverage = nullptr) const; + +private: + const Function &F; + bool ForceInstrumentEntry; + + /// Maps blocks to a minimal list of predecessors that can be used to infer + /// this block's coverage. + DenseMap PredecessorDependencies; + + /// Maps blocks to a minimal list of successors that can be used to infer + /// this block's coverage. + DenseMap SuccessorDependencies; + + /// Compute \p PredecessorDependencies and \p SuccessorDependencies. + void findDependencies(); + + /// Find the set of basic blocks that are reachable from \p Start without the + /// basic block \p Avoid. + void getReachableAvoiding(const BasicBlock &Start, const BasicBlock &Avoid, + bool IsForward, BlockSet &Reachable) const; + + static std::string getBlockNames(ArrayRef BBs); + static std::string getBlockNames(BlockSet BBs) { + return getBlockNames(ArrayRef(BBs.begin(), BBs.end())); + } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H diff --git a/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp @@ -0,0 +1,368 @@ +//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Our algorithm works by first identifying a subset of nodes that must always +// be instrumented. We call these nodes ambiguous because knowing the coverage +// of all remaining nodes is not enough to infer their coverage status. +// +// In general a node v is ambiguous if there exists two entry-to-terminal paths +// P_1 and P_2 such that: +// 1. v not in P_1 but P_1 visits a predecessor of v, and +// 2. v not in P_2 but P_2 visits a successor of v. +// +// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s +// coverage from the coverage of its predecessors, or if condition 2 fails, we +// can infer v’s coverage from the coverage of its successors. +// +// Sadly, there are example CFGs where it is not possible to infer all nodes +// from the ambiguous nodes alone. Our algorithm selects a minimum number of +// extra nodes to add to the ambiguous nodes to form a valid instrumentation S. +// +// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907 +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "pgo-block-coverage" + +STATISTIC(NumFunctions, "Number of total functions that BCI has processed"); +STATISTIC(NumIneligibleFunctions, + "Number of functions for which BCI cannot run on"); +STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed"); +STATISTIC(NumInstrumentedBlocks, + "Number of basic blocks instrumented for coverage"); + +BlockCoverageInference::BlockCoverageInference(const Function &F, + bool ForceInstrumentEntry) + : F(F), ForceInstrumentEntry(ForceInstrumentEntry) { + findDependencies(); + assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock())); + + ++NumFunctions; + for (auto &BB : F) { + ++NumBlocks; + if (shouldInstrumentBlock(BB)) + ++NumInstrumentedBlocks; + } +} + +BlockCoverageInference::BlockSet +BlockCoverageInference::getDependencies(const BasicBlock &BB) const { + assert(BB.getParent() == &F); + BlockSet Dependencies; + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end()) + Dependencies.set_union(It->second); + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end()) + Dependencies.set_union(It->second); + return Dependencies; +} + +uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const { + JamCRC JC; + uint64_t Index = 0; + for (auto &BB : F) { + if (shouldInstrumentBlock(BB)) { + uint8_t Data[8]; + support::endian::write64le(Data, Index); + JC.update(Data); + } + Index++; + } + return JC.getCRC(); +} + +bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const { + assert(BB.getParent() == &F); + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end() && It->second.size()) + return false; + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end() && It->second.size()) + return false; + return true; +} + +void BlockCoverageInference::findDependencies() { + assert(PredecessorDependencies.empty() && SuccessorDependencies.empty()); + // Empirical analysis shows that this algorithm finishes within 5 seconds for + // functions with fewer than 1.5K blocks. + if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) { + ++NumIneligibleFunctions; + return; + } + + SmallVector TerminalBlocks; + for (auto &BB : F) + if (succ_empty(&BB)) + TerminalBlocks.push_back(&BB); + + // Traverse the CFG backwards from the terminal blocks to make sure every + // block can reach some terminal block. Otherwise this algorithm will not work + // and we must fall back to instrumenting every block. + df_iterator_default_set Visited; + for (auto *BB : TerminalBlocks) + for (auto *N : inverse_depth_first_ext(BB, Visited)) + (void)N; + if (F.size() != Visited.size()) { + ++NumIneligibleFunctions; + return; + } + + // The current implementation for computing `PredecessorDependencies` and + // `SuccessorDependencies` runs in quadratic time with respect to the number + // of basic blocks. While we do have a more complicated linear time algorithm + // in https://arxiv.org/abs/2208.13907 we do not know if it will give a + // significant speedup in practice given that most functions tend to be + // relatively small in size for intended use cases. + auto &EntryBlock = F.getEntryBlock(); + for (auto &BB : F) { + // The set of blocks that are reachable while avoiding BB. + BlockSet ReachableFromEntry, ReachableFromTerminal; + getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true, + ReachableFromEntry); + for (auto *TerminalBlock : TerminalBlocks) + getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false, + ReachableFromTerminal); + + auto Preds = predecessors(&BB); + bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) { + return ReachableFromEntry.count(Pred) && + ReachableFromTerminal.count(Pred); + }); + if (!HasSuperReachablePred) + for (auto *Pred : Preds) + if (ReachableFromEntry.count(Pred)) + PredecessorDependencies[&BB].insert(Pred); + + auto Succs = successors(&BB); + bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) { + return ReachableFromEntry.count(Succ) && + ReachableFromTerminal.count(Succ); + }); + if (!HasSuperReachableSucc) + for (auto *Succ : Succs) + if (ReachableFromTerminal.count(Succ)) + SuccessorDependencies[&BB].insert(Succ); + } + + if (ForceInstrumentEntry) { + // Force the entry block to be instrumented by clearing the blocks it can + // infer coverage from. + PredecessorDependencies[&EntryBlock].clear(); + SuccessorDependencies[&EntryBlock].clear(); + } + + // Construct a graph where blocks are connected if there is a mutual + // dependency between them. This graph has a special property that it contains + // only paths. + DenseMap AdjacencyList; + for (auto &BB : F) { + for (auto *Succ : successors(&BB)) { + if (SuccessorDependencies[&BB].count(Succ) && + PredecessorDependencies[Succ].count(&BB)) { + AdjacencyList[&BB].insert(Succ); + AdjacencyList[Succ].insert(&BB); + } + } + } + + // Given a path with at least one node, return the next node on the path. + auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * { + assert(Path.size()); + auto &Neighbors = AdjacencyList[Path.back()]; + if (Path.size() == 1) { + // This is the first node on the path, return its neighbor. + assert(Neighbors.size() == 1); + return Neighbors.front(); + } else if (Neighbors.size() == 2) { + // This is the middle of the path, find the neighbor that is not on the + // path already. + assert(Path.size() >= 2); + return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0]; + } + // This is the end of the path. + assert(Neighbors.size() == 1); + return nullptr; + }; + + // Remove all cycles in the inferencing graph. + for (auto &BB : F) { + if (AdjacencyList[&BB].size() == 1) { + // We found the head of some path. + BlockSet Path; + Path.insert(&BB); + while (const BasicBlock *Next = getNextOnPath(Path)) + Path.insert(Next); + LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n"); + + // Remove these nodes from the graph so we don't discover this path again. + for (auto *BB : Path) + AdjacencyList[BB].clear(); + + // Finally, remove the cycles. + if (PredecessorDependencies[Path.front()].size()) { + for (auto *BB : Path) + if (BB != Path.back()) + SuccessorDependencies[BB].clear(); + } else { + for (auto *BB : Path) + if (BB != Path.front()) + PredecessorDependencies[BB].clear(); + } + } + } + LLVM_DEBUG(dump(dbgs())); +} + +void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start, + const BasicBlock &Avoid, + bool IsForward, + BlockSet &Reachable) const { + df_iterator_default_set Visited; + Visited.insert(&Avoid); + if (IsForward) { + auto Range = depth_first_ext(&Start, Visited); + Reachable.insert(Range.begin(), Range.end()); + } else { + auto Range = inverse_depth_first_ext(&Start, Visited); + Reachable.insert(Range.begin(), Range.end()); + } +} + +namespace llvm { +class DotFuncBCIInfo { +private: + const BlockCoverageInference *BCI; + const DenseMap *Coverage; + +public: + DotFuncBCIInfo(const BlockCoverageInference *BCI, + const DenseMap *Coverage) + : BCI(BCI), Coverage(Coverage) {} + + const Function &getFunction() { return BCI->F; } + + bool isInstrumented(const BasicBlock *BB) const { + return BCI->shouldInstrumentBlock(*BB); + } + + bool isCovered(const BasicBlock *BB) const { + return Coverage && Coverage->lookup(BB); + } + + bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const { + return BCI->getDependencies(*Src).count(Dest); + } +}; + +template <> +struct GraphTraits : public GraphTraits { + static NodeRef getEntryNode(DotFuncBCIInfo *Info) { + return &(Info->getFunction().getEntryBlock()); + } + + // nodes_iterator/begin/end - Allow iteration over all nodes in the graph + using nodes_iterator = pointer_iterator; + + static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) { + return nodes_iterator(Info->getFunction().begin()); + } + + static nodes_iterator nodes_end(DotFuncBCIInfo *Info) { + return nodes_iterator(Info->getFunction().end()); + } + + static size_t size(DotFuncBCIInfo *Info) { + return Info->getFunction().size(); + } +}; + +template <> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + static std::string getGraphName(DotFuncBCIInfo *Info) { + return "BCI CFG for " + Info->getFunction().getName().str(); + } + + std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) { + return Node->getName().str(); + } + + std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I, + DotFuncBCIInfo *Info) { + const BasicBlock *Dest = *I; + if (Info->isDependent(Src, Dest)) + return "color=red"; + if (Info->isDependent(Dest, Src)) + return "color=blue"; + return ""; + } + + std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) { + std::string Result; + if (Info->isInstrumented(Node)) + Result += "style=filled,fillcolor=gray"; + if (Info->isCovered(Node)) + Result += std::string(Result.empty() ? "" : ",") + "color=red"; + return Result; + } +}; + +} // namespace llvm + +void BlockCoverageInference::viewBlockCoverageGraph( + const DenseMap *Coverage) const { + DotFuncBCIInfo Info(this, Coverage); + WriteGraph(&Info, "BCI", false, + "Block Coverage Inference for " + F.getName()); +} + +void BlockCoverageInference::dump(raw_ostream &OS) const { + OS << "Minimal block coverage for function \'" << F.getName() + << "\' (Instrumented=*)\n"; + for (auto &BB : F) { + OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n"; + auto It = PredecessorDependencies.find(&BB); + if (It != PredecessorDependencies.end() && It->second.size()) + OS << " PredDeps = " << getBlockNames(It->second) << "\n"; + It = SuccessorDependencies.find(&BB); + if (It != SuccessorDependencies.end() && It->second.size()) + OS << " SuccDeps = " << getBlockNames(It->second) << "\n"; + } + OS << " Instrumented Blocks Hash = 0x" + << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n"; +} + +std::string +BlockCoverageInference::getBlockNames(ArrayRef BBs) { + std::string Result; + raw_string_ostream OS(Result); + OS << "["; + if (!BBs.empty()) { + OS << BBs.front()->getName(); + BBs = BBs.drop_front(); + } + for (auto *BB : BBs) + OS << ", " << BB->getName(); + OS << "]"; + return OS.str(); +} diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -5,6 +5,7 @@ ControlHeightReduction.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp + BlockCoverageInference.cpp MemProfiler.cpp MemorySanitizer.cpp IndirectCallPromotion.cpp diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -540,18 +540,19 @@ // the instrumented function. This is counting the number of instrumented // target value sites to enter it as field in the profile data variable. for (Function &F : M) { - InstrProfIncrementInst *FirstProfIncInst = nullptr; + InstrProfInstBase *FirstProfInst = nullptr; for (BasicBlock &BB : F) for (auto I = BB.begin(), E = BB.end(); I != E; I++) if (auto *Ind = dyn_cast(I)) computeNumValueSiteCounts(Ind); - else if (FirstProfIncInst == nullptr) - FirstProfIncInst = dyn_cast(I); + else if (FirstProfInst == nullptr && + (isa(I) || isa(I))) + FirstProfInst = dyn_cast(I); // Value profiling intrinsic lowering requires per-function profile data // variable to be created first. - if (FirstProfIncInst != nullptr) - static_cast(getOrCreateRegionCounters(FirstProfIncInst)); + if (FirstProfInst != nullptr) + static_cast(getOrCreateRegionCounters(FirstProfInst)); } for (Function &F : M) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -113,6 +113,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/MisExpect.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -160,6 +161,7 @@ STATISTIC(NumOfCSPGOMismatch, "Number of functions having mismatch profile in CSPGO."); STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); +STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed"); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -269,6 +271,15 @@ cl::desc( "Use this option to enable function entry coverage instrumentation.")); +static cl::opt PGOBlockCoverage( + "pgo-block-coverage", + cl::desc("Use this option to enable basic block coverage instrumentation")); + +static cl::opt + PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph", + cl::desc("Create a dot file of CFGs with block " + "coverage inference information")); + static cl::opt PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden, cl::desc("Fix function entry count in profile use.")); @@ -384,6 +395,8 @@ if (PGOFunctionEntryCoverage) ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY; + if (PGOBlockCoverage) + ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE; auto IRLevelVersionVariable = new GlobalVariable( M, IntTy64, true, GlobalValue::WeakAnyLinkage, Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); @@ -416,8 +429,10 @@ GlobalVariable *FuncNameVar = nullptr; uint64_t FuncHash = 0; PGOUseFunc *UseFunc = nullptr; + bool HasSingleByteCoverage; - SelectInstVisitor(Function &Func) : F(Func) {} + SelectInstVisitor(Function &Func, bool HasSingleByteCoverage) + : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {} void countSelects(Function &Func) { NSIs = 0; @@ -535,6 +550,16 @@ // The Minimum Spanning Tree of function CFG. CFGMST MST; + const std::optional BCI; + + static std::optional + constructBCI(Function &Func, bool HasSingleByteCoverage, + bool InstrumentFuncEntry) { + if (HasSingleByteCoverage) + return BlockCoverageInference(Func, InstrumentFuncEntry); + return {}; + } + // Collect all the BBs that will be instrumented, and store them in // InstrumentBBs. void getInstrumentBBs(std::vector &InstrumentBBs); @@ -560,10 +585,14 @@ std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr, bool IsCS = false, - bool InstrumentFuncEntry = true) + bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false) : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI), - TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func), - MST(F, InstrumentFuncEntry, BPI, BFI) { + TLI(TLI), ValueSites(IPVK_Last + 1), + SIVisitor(Func, HasSingleByteCoverage), + MST(F, InstrumentFuncEntry, BPI, BFI), + BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) { + if (BCI && PGOViewBlockCoverageGraph) + BCI->viewBlockCoverageGraph(); // This should be done before CFG hash computation. SIVisitor.countSelects(Func); ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize); @@ -638,7 +667,11 @@ updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts()); updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size()); updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size()); - updateJCH((uint64_t)MST.AllEdges.size()); + if (BCI) { + updateJCH(BCI->getInstrumentedBlocksHash()); + } else { + updateJCH((uint64_t)MST.AllEdges.size()); + } // Hash format for context sensitive profile. Reserve 4 bits for other // information. @@ -731,6 +764,13 @@ template void FuncPGOInstrumentation::getInstrumentBBs( std::vector &InstrumentBBs) { + if (BCI) { + for (auto &BB : F) + if (BCI->shouldInstrumentBlock(BB)) + InstrumentBBs.push_back(&BB); + return; + } + // Use a worklist as we will update the vector during the iteration. std::vector EdgeList; EdgeList.reserve(MST.AllEdges.size()); @@ -853,12 +893,15 @@ BlockFrequencyInfo *BFI, std::unordered_multimap &ComdatMembers, bool IsCS) { - // Split indirectbr critical edges here before computing the MST rather than - // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); + if (!PGOBlockCoverage) { + // Split indirectbr critical edges here before computing the MST rather than + // later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); + } FuncPGOInstrumentation FuncInfo( - F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry); + F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry, + PGOBlockCoverage); Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy); @@ -888,7 +931,9 @@ // llvm.instrprof.increment(i8* , i64 , i32 , // i32 ) Builder.CreateCall( - Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), + Intrinsic::getDeclaration(M, PGOBlockCoverage + ? Intrinsic::instrprof_cover + : Intrinsic::instrprof_increment), {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } @@ -1031,12 +1076,15 @@ PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin, - ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry) + ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry, + bool HasSingleByteCoverage) : F(Func), M(Modu), BFI(BFIin), PSI(PSI), FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS, - InstrumentFuncEntry), + InstrumentFuncEntry, HasSingleByteCoverage), FreqAttr(FFA_Normal), IsCS(IsCS) {} + void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum); + // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros, InstrProfRecord::CountPseudoKind &PseudoKind); @@ -1047,6 +1095,9 @@ // Populate the counts for all BBs. void populateCounters(); + // Set block coverage based on profile coverage values. + void populateCoverage(IndexedInstrProfReader *PGOReader); + // Set the branch weights based on the count values. void setBranchWeights(); @@ -1495,6 +1546,46 @@ return true; } +void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) { + handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { + auto &Ctx = M->getContext(); + auto Err = IPE.get(); + bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); + if (Err == instrprof_error::unknown_function) { + IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; + SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); + } else if (Err == instrprof_error::hash_mismatch || + Err == instrprof_error::malformed) { + IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; + SkipWarning = + NoPGOWarnMismatch || + (NoPGOWarnMismatchComdatWeak && + (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || + F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash + << " skip=" << SkipWarning << ")"); + // Emit function metadata indicating PGO profile mismatch. + annotateFunctionWithHashMismatch(F, M->getContext()); + } + + LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); + if (SkipWarning) + return; + + std::string Msg = + IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + + std::string(" up to ") + std::to_string(MismatchedFuncSum) + + std::string(" count discarded"); + + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + }); +} + // Read the profile from ProfileFileName and assign the value to the // instrumented BB and the edges. This function also updates ProgramMaxCount. // Return true if the profile are successfully read, and false on errors. @@ -1505,42 +1596,7 @@ Expected Result = PGOReader->getInstrProfRecord( FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); if (Error E = Result.takeError()) { - handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { - auto Err = IPE.get(); - bool SkipWarning = false; - LLVM_DEBUG(dbgs() << "Error in reading profile for Func " - << FuncInfo.FuncName << ": "); - if (Err == instrprof_error::unknown_function) { - IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; - SkipWarning = !PGOWarnMissing; - LLVM_DEBUG(dbgs() << "unknown function"); - } else if (Err == instrprof_error::hash_mismatch || - Err == instrprof_error::malformed) { - IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; - SkipWarning = - NoPGOWarnMismatch || - (NoPGOWarnMismatchComdatWeak && - (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage || - F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); - LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash - << " skip=" << SkipWarning << ")"); - // Emit function metadata indicating PGO profile mismatch. - annotateFunctionWithHashMismatch(F, M->getContext()); - } - - LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); - if (SkipWarning) - return; - - std::string Msg = - IPE.message() + std::string(" ") + F.getName().str() + - std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) + - std::string(" up to ") + std::to_string(MismatchedFuncSum) + - std::string(" count discarded"); - - Ctx.diagnose( - DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); - }); + handleInstrProfError(std::move(E), MismatchedFuncSum); return false; } ProfileRecord = std::move(Result.get()); @@ -1579,6 +1635,113 @@ return true; } +void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) { + uint64_t MismatchedFuncSum = 0; + Expected Result = PGOReader->getInstrProfRecord( + FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum); + if (auto Err = Result.takeError()) { + handleInstrProfError(std::move(Err), MismatchedFuncSum); + return; + } + + std::vector &CountsFromProfile = Result.get().Counts; + DenseMap Coverage; + unsigned Index = 0; + for (auto &BB : F) + if (FuncInfo.BCI->shouldInstrumentBlock(BB)) + Coverage[&BB] = (CountsFromProfile[Index++] != 0); + assert(Index == CountsFromProfile.size()); + + // For each B in InverseDependencies[A], if A is covered then B is covered. + DenseMap> + InverseDependencies; + for (auto &BB : F) { + for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) { + // If Dep is covered then BB is covered. + InverseDependencies[Dep].insert(&BB); + } + } + + // Infer coverage of the non-instrumented blocks using a flood-fill algorithm. + std::stack CoveredBlocksToProcess; + for (auto &[BB, IsCovered] : Coverage) + if (IsCovered) + CoveredBlocksToProcess.push(BB); + + while (!CoveredBlocksToProcess.empty()) { + auto *CoveredBlock = CoveredBlocksToProcess.top(); + assert(Coverage[CoveredBlock]); + CoveredBlocksToProcess.pop(); + for (auto *BB : InverseDependencies[CoveredBlock]) { + // If CoveredBlock is covered then BB is covered. + if (Coverage[BB]) + continue; + Coverage[BB] = true; + CoveredBlocksToProcess.push(BB); + } + } + + // Annotate block coverage. + MDBuilder MDB(F.getContext()); + // We set the entry count to 10000 if the entry block is covered so that BFI + // can propagate a fraction of this count to the other covered blocks. + F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0); + for (auto &BB : F) { + // For a block A and its successor B, we set the edge weight as follows: + // If A is covered and B is covered, set weight=1. + // If A is covered and B is uncovered, set weight=0. + // If A is uncovered, set weight=1. + // This setup will allow BFI to give nonzero profile counts to only covered + // blocks. + SmallVector Weights; + for (auto *Succ : successors(&BB)) + Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0); + if (Weights.size() >= 2) + BB.getTerminator()->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + } + + unsigned NumCorruptCoverage = 0; + DominatorTree DT(F); + LoopInfo LI(DT); + BranchProbabilityInfo BPI(F, LI); + BlockFrequencyInfo BFI(F, BPI, LI); + auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional { + if (auto C = BFI.getBlockProfileCount(&BB)) + return C == 0; + return {}; + }; + LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n"); + for (auto &BB : F) { + LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ") + << (Coverage[&BB] ? "X " : " ") << " " << BB.getName() + << "\n"); + // In some cases it is possible to find a covered block that has no covered + // successors, e.g., when a block calls a function that may call exit(). In + // those cases, BFI could find its successor to be covered while BCI could + // find its successor to be dead. + if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) { + LLVM_DEBUG( + dbgs() << "Found inconsistent block covearge for " << BB.getName() + << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI=" + << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n"); + ++NumCorruptCoverage; + } + if (Coverage[&BB]) + ++NumCoveredBlocks; + } + if (PGOVerifyBFI && NumCorruptCoverage) { + auto &Ctx = M->getContext(); + Ctx.diagnose(DiagnosticInfoPGOProfile( + M->getName().data(), + Twine("Found inconsistent block coverage for function ") + F.getName() + + " in " + Twine(NumCorruptCoverage) + " blocks.", + DS_Warning)); + } + if (PGOViewBlockCoverageGraph) + FuncInfo.BCI->viewBlockCoverageGraph(&Coverage); +} + // Populate the counters from instrumented BBs to all BBs. // In the end of this operation, all BBs should have a valid count value. void PGOUseFunc::populateCounters() { @@ -1738,8 +1901,6 @@ } void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { - if (PGOFunctionEntryCoverage) - return; Module *M = F.getParent(); IRBuilder<> Builder(&SI); Type *Int64Ty = Builder.getInt64Ty(); @@ -1772,7 +1933,7 @@ } void SelectInstVisitor::visitSelectInst(SelectInst &SI) { - if (!PGOInstrSelect) + if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage) return; // FIXME: do not handle this yet. if (SI.getCondition()->getType()->isVectorTy()) @@ -2094,12 +2255,6 @@ ProfileFileName.data(), "Not an IR level instrumentation profile")); return false; } - if (PGOReader->hasSingleByteCoverage()) { - Ctx.diagnose(DiagnosticInfoPGOProfile( - ProfileFileName.data(), - "Cannot use coverage profiles for optimization")); - return false; - } if (PGOReader->functionEntryOnly()) { Ctx.diagnose(DiagnosticInfoPGOProfile( ProfileFileName.data(), @@ -2125,17 +2280,21 @@ bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled(); if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; + bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage(); for (auto &F : M) { if (skipPGO(F)) continue; auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - // Split indirectbr critical edges here before computing the MST rather than - // later in getInstrBB() to avoid invalidating it. - SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI); + if (!HasSingleByteCoverage) { + // Split indirectbr critical edges here before computing the MST rather + // than later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, + BFI); + } PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS, - InstrumentFuncEntry); + InstrumentFuncEntry, HasSingleByteCoverage); // Read and match memprof first since we do this via debug info and can // match even if there is an IR mismatch detected for regular PGO below. if (PGOReader->hasMemoryProfile()) @@ -2144,6 +2303,10 @@ if (!PGOReader->isIRLevelProfile()) continue; + if (HasSingleByteCoverage) { + Func.populateCoverage(PGOReader.get()); + continue; + } // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo, // it means the profile for the function is unrepresentative and this // function is actually hot / warm. We will reset the function hot / cold diff --git a/llvm/test/Transforms/PGOProfile/coverage.ll b/llvm/test/Transforms/PGOProfile/coverage.ll --- a/llvm/test/Transforms/PGOProfile/coverage.ll +++ b/llvm/test/Transforms/PGOProfile/coverage.ll @@ -1,26 +1,150 @@ -; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,ENTRY +; RUN: opt < %s -passes=pgo-instr-gen -pgo-block-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,BLOCK target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define i32 @foo(i32 %i) { +define void @foo() { +; CHECK-LABEL: entry: entry: - ; CHECK: call void @llvm.instrprof.cover({{.*}}) - %cmp = icmp sgt i32 %i, 0 - br i1 %cmp, label %if.then, label %if.else + ; ENTRY: call void @llvm.instrprof.cover({{.*}}) + %c = call i1 @choice() + br i1 %c, label %if.then, label %if.else +; CHECK-LABEL: if.then: if.then: - ; CHECK-NOT: llvm.instrprof.cover( - %add = add nsw i32 %i, 2 - %s = select i1 %cmp, i32 %add, i32 0 + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) br label %if.end +; CHECK-LABEL: if.else: if.else: - %sub = sub nsw i32 %i, 2 + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %if.end + +; CHECK-LABEL: if.end: +if.end: + ret void +} + +define void @bar() { +; CHECK-LABEL: entry: +entry: + ; ENTRY: call void @llvm.instrprof.cover({{.*}}) + %c = call i1 @choice() + br i1 %c, label %if.then, label %if.end + +; CHECK-LABEL: if.then: +if.then: + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) br label %if.end +; CHECK-LABEL: if.end: if.end: - %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ] - ret i32 %retv + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + ret void +} + +define void @goo() { +; CHECK-LABEL: entry: +entry: + ; CHECK: call void @llvm.instrprof.cover({{.*}}) + ret void +} + +define void @loop() { +; CHECK-LABEL: entry: +entry: + ; CHECK: call void @llvm.instrprof.cover({{.*}}) + br label %while +while: + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %while +} + +; Function Attrs: noinline nounwind ssp uwtable +define void @hoo(i32 %a) #0 { +; CHECK-LABEL: entry: +entry: + ; ENTRY: call void @llvm.instrprof.cover({{.*}}) + %a.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %rem = srem i32 %0, 2 + %cmp = icmp eq i32 %rem, 0 + br i1 %cmp, label %if.then, label %if.else + +; CHECK-LABEL: if.then: +if.then: ; preds = %entry + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %if.end + +; CHECK-LABEL: if.else: +if.else: ; preds = %entry + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %if.end + +; CHECK-LABEL: if.end: +if.end: ; preds = %if.else, %if.then + store i32 1, i32* %i, align 4 + br label %for.cond + +; CHECK-LABEL: for.cond: +for.cond: ; preds = %for.inc, %if.end + %1 = load i32, i32* %i, align 4 + %2 = load i32, i32* %a.addr, align 4 + %cmp1 = icmp slt i32 %1, %2 + br i1 %cmp1, label %for.body, label %for.end + +; CHECK-LABEL: for.body: +for.body: ; preds = %for.cond + %3 = load i32, i32* %a.addr, align 4 + %rem2 = srem i32 %3, 3 + %cmp3 = icmp eq i32 %rem2, 0 + br i1 %cmp3, label %if.then4, label %if.else5 + +; CHECK-LABEL: if.then4: +if.then4: ; preds = %for.body + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %if.end10 + +; CHECK-LABEL: if.else5: +if.else5: ; preds = %for.body + %4 = load i32, i32* %a.addr, align 4 + %rem6 = srem i32 %4, 1001 + %cmp7 = icmp eq i32 %rem6, 0 + br i1 %cmp7, label %if.then8, label %if.end9 + +; CHECK-LABEL: if.then8: +if.then8: ; preds = %if.else5 + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %return + +; CHECK-LABEL: if.end9: +if.end9: ; preds = %if.else5 + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %if.end10 + +; CHECK-LABEL: if.end10: +if.end10: ; preds = %if.end9, %if.then4 + br label %for.inc + +; CHECK-LABEL: for.inc: +for.inc: ; preds = %if.end10 + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +; CHECK-LABEL: for.end: +for.end: ; preds = %for.cond + ; BLOCK: call void @llvm.instrprof.cover({{.*}}) + br label %return + +; CHECK-LABEL: return: +return: ; preds = %for.end, %if.then8 + ret void } -; CHECK: declare void @llvm.instrprof.cover( +declare i1 @choice() + +; CHECK: declare void @llvm.instrprof.cover({{.*}})