diff --git a/compiler-rt/test/profile/instrprof-entry-coverage.c b/compiler-rt/test/profile/instrprof-entry-coverage.c
new file mode 100644
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-entry-coverage.c
@@ -0,0 +1,47 @@
+// RUN: %clang_pgogen -mllvm -pgo-block-coverage %s -o %t.out
+// RUN: env LLVM_PROFILE_FILE=%t1.profraw %run %t.out 1
+// RUN: env LLVM_PROFILE_FILE=%t2.profraw %run %t.out 2
+// RUN: llvm-profdata merge -o %t.profdata %t1.profraw %t2.profraw
+// RUN: %clang_profuse=%t.profdata -mllvm -pgo-verify-bfi -o - -S -emit-llvm %s 2>%t.errs | FileCheck %s --implicit-check-not="!prof"
+// RUN: FileCheck %s < %t.errs --allow-empty --check-prefix=CHECK-ERROR
+
+#include <stdlib.h>
+
+// CHECK: @foo({{.*}})
+// CHECK-SAME: !prof ![[PROF0:[0-9]+]]
+void foo(int a) {
+  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF1:[0-9]+]]
+  if (a % 2 == 0) {
+    //
+  } else {
+    //
+  }
+
+  // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF1]]
+  for (int i = 1; i < a; i++) {
+    // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF2:[0-9]+]]
+    if (a % 3 == 0) {
+      //
+    } else {
+      // CHECK: br i1 %{{.*}}, label %{{.*}}, label %{{.*}}, !prof ![[PROF2]]
+      if (a % 1001 == 0) {
+        return;
+      }
+    }
+  }
+
+  return;
+}
+
+// CHECK: @main({{.*}})
+// CHECK-SAME: !prof ![[PROF0]]
+int main(int argc, char *argv[]) {
+  foo(atoi(argv[1]));
+  return 0;
+}
+
+// CHECK-DAG: ![[PROF0]] = !{!"function_entry_count", i64 10000}
+// CHECK-DAG: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1}
+// CHECK-DAG: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1}
+
+// CHECK-ERROR-NOT: warning: {{.*}}: Found inconsistent block coverage
diff --git a/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h
new file mode 100644
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h
@@ -0,0 +1,85 @@
+//===-- BlockCoverageInference.h - Minimal Execution Coverage ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file finds the minimum set of blocks on a CFG that must be instrumented
+/// to infer execution coverage for the whole graph.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+
+class Function;
+class BasicBlock;
+class DotFuncBCIInfo;
+
+class BlockCoverageInference {
+  friend class DotFuncBCIInfo;
+
+public:
+  using BlockSet = SmallSetVector<const BasicBlock *, 4>;
+
+  BlockCoverageInference(const Function &F, bool ForceInstrumentEntry);
+
+  /// \return true if \p BB should be instrumented for coverage.
+  bool shouldInstrumentBlock(const BasicBlock &BB) const;
+
+  /// \return the set of blocks such that \p BB is covered iff any of them are
+  /// covered.
+  BlockSet getDependencies(const BasicBlock &BB) const;
+
+  /// \return a hash that depends on the set of instrumented blocks.
+  uint64_t getInstrumentedBlocksHash() const;
+
+  /// Dump the inference graph.
+  void dump(raw_ostream &OS) const;
+
+  /// View the inferred block coverage as a dot file.
+  /// Filled gray blocks are instrumented, red outlined blocks are found to be
+  /// covered, red edges show that a block's coverage can be inferred from its
+  /// successors, and blue edges show that a block's coverage can be inferred
+  /// from its predecessors.
+  void viewBlockCoverageGraph(
+      const DenseMap<const BasicBlock *, bool> *Coverage = nullptr) const;
+
+private:
+  const Function &F;
+  bool ForceInstrumentEntry;
+
+  /// Maps blocks to a minimal list of predecessors that can be used to infer
+  /// this block's coverage.
+  DenseMap<const BasicBlock *, BlockSet> PredecessorDependencies;
+
+  /// Maps blocks to a minimal list of successors that can be used to infer
+  /// this block's coverage.
+  DenseMap<const BasicBlock *, BlockSet> SuccessorDependencies;
+
+  /// Compute \p PredecessorDependencies and \p SuccessorDependencies.
+  void findDependencies();
+
+  /// Find the set of basic blocks that are reachable from \p Start without the
+  /// basic block \p Avoid.
+  void getReachableAvoiding(const BasicBlock &Start, const BasicBlock &Avoid,
+                            bool IsForward, BlockSet &Reachable) const;
+
+  static std::string getBlockNames(ArrayRef<const BasicBlock *> BBs);
+  static std::string getBlockNames(BlockSet BBs) {
+    return getBlockNames(ArrayRef<const BasicBlock *>(BBs.begin(), BBs.end()));
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
diff --git a/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
@@ -0,0 +1,364 @@
+//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Our algorithm works by first identifying a subset of nodes that must always
+// be instrumented. We call these nodes ambiguous because knowing the coverage
+// of all remaining nodes is not enough to infer their coverage status.
+//
+// In general a node v is ambiguous if there exists two entry-to-terminal paths
+// P_1 and P_2 such that:
+//   1. v not in P_1 but P_1 visits a predecessor of v, and
+//   2. v not in P_2 but P_2 visits a successor of v.
+//
+// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
+// coverage from the coverage of its predecessors, or if condition 2 fails, we
+// can infer v’s coverage from the coverage of its successors.
+//
+// Sadly, there are example CFGs where it is not possible to infer all nodes
+// from the ambiguous nodes alone. Our algorithm selects a minimum number of
+// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
+//
+// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-block-coverage"
+
+STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
+STATISTIC(NumIneligibleFunctions,
+          "Number of functions for which BCI cannot run on");
+STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
+STATISTIC(NumInstrumentedBlocks,
+          "Number of basic blocks instrumented for coverage");
+
+BlockCoverageInference::BlockCoverageInference(const Function &F,
+                                               bool ForceInstrumentEntry)
+    : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
+  findDependencies();
+  assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
+
+  ++NumFunctions;
+  for (auto &BB : F) {
+    ++NumBlocks;
+    if (shouldInstrumentBlock(BB))
+      ++NumInstrumentedBlocks;
+  }
+}
+
+BlockCoverageInference::BlockSet
+BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
+  assert(BB.getParent() == &F);
+  BlockSet Dependencies;
+  auto It = PredecessorDependencies.find(&BB);
+  if (It != PredecessorDependencies.end())
+    Dependencies.set_union(It->second);
+  It = SuccessorDependencies.find(&BB);
+  if (It != SuccessorDependencies.end())
+    Dependencies.set_union(It->second);
+  return Dependencies;
+}
+
+uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
+  JamCRC JC;
+  uint64_t Index = 0;
+  for (auto &BB : F) {
+    if (shouldInstrumentBlock(BB)) {
+      uint8_t Data[8];
+      support::endian::write64le(Data, Index);
+      JC.update(Data);
+    }
+    Index++;
+  }
+  return JC.getCRC();
+}
+
+bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
+  assert(BB.getParent() == &F);
+  auto It = PredecessorDependencies.find(&BB);
+  if (It != PredecessorDependencies.end() && It->second.size())
+    return false;
+  It = SuccessorDependencies.find(&BB);
+  if (It != SuccessorDependencies.end() && It->second.size())
+    return false;
+  return true;
+}
+
+void BlockCoverageInference::findDependencies() {
+  assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
+  if (F.hasFnAttribute(Attribute::NoReturn)) {
+    ++NumIneligibleFunctions;
+    return;
+  }
+
+  SmallVector<const BasicBlock *, 4> ExitBlocks;
+  for (auto &BB : F)
+    if (succ_empty(&BB))
+      ExitBlocks.push_back(&BB);
+
+  // Traverse the CFG backwards from the exit blocks to make sure every block
+  // can reach some exit block. Otherwise this algorithm will not work and we
+  // must fall back to instrumenting every block.
+  df_iterator_default_set<const BasicBlock *> Visited;
+  for (auto *BB : ExitBlocks)
+    for (auto *N : inverse_depth_first_ext(BB, Visited))
+      (void)N;
+  if (F.size() != Visited.size()) {
+    ++NumIneligibleFunctions;
+    return;
+  }
+
+  // The current implementation for computing `PredecessorDependencies` and
+  // `SuccessorDependencies` runs in quadratic time with respect to the number
+  // of basic blocks. While we do have a more complicated linear time algorithm
+  // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
+  // significant speedup in practice given that most functions tend to be
+  // relatively small in size.
+  auto &EntryBlock = F.getEntryBlock();
+  for (auto &BB : F) {
+    // The set of blocks that are reachable while avoiding BB.
+    BlockSet ReachableFromEntry, ReachableFromExit;
+    getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
+                         ReachableFromEntry);
+    for (auto *ExitBlock : ExitBlocks)
+      getReachableAvoiding(*ExitBlock, BB, /*IsForward=*/false,
+                           ReachableFromExit);
+
+    auto Preds = predecessors(&BB);
+    bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
+      return ReachableFromEntry.count(Pred) && ReachableFromExit.count(Pred);
+    });
+    if (!HasSuperReachablePred)
+      for (auto *Pred : Preds)
+        if (ReachableFromEntry.count(Pred))
+          PredecessorDependencies[&BB].insert(Pred);
+
+    auto Succs = successors(&BB);
+    bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
+      return ReachableFromEntry.count(Succ) && ReachableFromExit.count(Succ);
+    });
+    if (!HasSuperReachableSucc)
+      for (auto *Succ : Succs)
+        if (ReachableFromExit.count(Succ))
+          SuccessorDependencies[&BB].insert(Succ);
+  }
+
+  if (ForceInstrumentEntry) {
+    // Force the entry block to be instrumented by clearing the blocks it can
+    // infer coverage from.
+    PredecessorDependencies[&EntryBlock].clear();
+    SuccessorDependencies[&EntryBlock].clear();
+  }
+
+  // Construct a graph where blocks are connected if there is a mutual
+  // dependency between them. This graph has a special property that it contains
+  // only paths.
+  DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
+  for (auto &BB : F) {
+    for (auto *Succ : successors(&BB)) {
+      if (SuccessorDependencies[&BB].count(Succ) &&
+          PredecessorDependencies[Succ].count(&BB)) {
+        AdjacencyList[&BB].insert(Succ);
+        AdjacencyList[Succ].insert(&BB);
+      }
+    }
+  }
+
+  // Given a path with at least one node, return the next node on the path.
+  auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
+    assert(Path.size());
+    auto &Neighbors = AdjacencyList[Path.back()];
+    if (Path.size() == 1) {
+      // This is the first node on the path, return its neighbor.
+      assert(Neighbors.size() == 1);
+      return Neighbors.front();
+    } else if (Neighbors.size() == 2) {
+      // This is the middle of the path, find the neighbor that is not on the
+      // path already.
+      assert(Path.size() >= 2);
+      return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
+    }
+    // This is the end of the path.
+    assert(Neighbors.size() == 1);
+    return nullptr;
+  };
+
+  // Remove all cycles in the inferencing graph.
+  for (auto &BB : F) {
+    if (AdjacencyList[&BB].size() == 1) {
+      // We found the head of some path.
+      BlockSet Path;
+      Path.insert(&BB);
+      while (const BasicBlock *Next = getNextOnPath(Path))
+        Path.insert(Next);
+      LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
+
+      // Remove these nodes from the graph so we don't discover this path again.
+      for (auto *BB : Path)
+        AdjacencyList[BB].clear();
+
+      // Finally, remove the cycles.
+      if (PredecessorDependencies[Path.front()].size()) {
+        for (auto *BB : Path)
+          if (BB != Path.back())
+            SuccessorDependencies[BB].clear();
+      } else {
+        for (auto *BB : Path)
+          if (BB != Path.front())
+            PredecessorDependencies[BB].clear();
+      }
+    }
+  }
+  LLVM_DEBUG(dump(dbgs()));
+}
+
+void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
+                                                  const BasicBlock &Avoid,
+                                                  bool IsForward,
+                                                  BlockSet &Reachable) const {
+  df_iterator_default_set<const BasicBlock *> Visited;
+  Visited.insert(&Avoid);
+  if (IsForward) {
+    auto Range = depth_first_ext(&Start, Visited);
+    Reachable.insert(Range.begin(), Range.end());
+  } else {
+    auto Range = inverse_depth_first_ext(&Start, Visited);
+    Reachable.insert(Range.begin(), Range.end());
+  }
+}
+
+namespace llvm {
+class DotFuncBCIInfo {
+private:
+  const BlockCoverageInference *BCI;
+  const DenseMap<const BasicBlock *, bool> *Coverage;
+
+public:
+  DotFuncBCIInfo(const BlockCoverageInference *BCI,
+                 const DenseMap<const BasicBlock *, bool> *Coverage)
+      : BCI(BCI), Coverage(Coverage) {}
+
+  const Function &getFunction() { return BCI->F; }
+
+  bool isInstrumented(const BasicBlock *BB) const {
+    return BCI->shouldInstrumentBlock(*BB);
+  }
+
+  bool isCovered(const BasicBlock *BB) const {
+    return Coverage && Coverage->lookup(BB);
+  }
+
+  bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
+    return BCI->getDependencies(*Src).count(Dest);
+  }
+};
+
+template <>
+struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
+  static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
+    return &(Info->getFunction().getEntryBlock());
+  }
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+  static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
+    return nodes_iterator(Info->getFunction().begin());
+  }
+
+  static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
+    return nodes_iterator(Info->getFunction().end());
+  }
+
+  static size_t size(DotFuncBCIInfo *Info) {
+    return Info->getFunction().size();
+  }
+};
+
+template <>
+struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+  static std::string getGraphName(DotFuncBCIInfo *Info) {
+    return "BCI CFG for " + Info->getFunction().getName().str();
+  }
+
+  std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+    return Node->getName().str();
+  }
+
+  std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
+                                DotFuncBCIInfo *Info) {
+    const BasicBlock *Dest = *I;
+    if (Info->isDependent(Src, Dest))
+      return "color=red";
+    if (Info->isDependent(Dest, Src))
+      return "color=blue";
+    return "";
+  }
+
+  std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+    std::string Result;
+    if (Info->isInstrumented(Node))
+      Result += "style=filled,fillcolor=gray";
+    if (Info->isCovered(Node))
+      Result += std::string(Result.empty() ? "" : ",") + "color=red";
+    return Result;
+  }
+};
+
+} // namespace llvm
+
+void BlockCoverageInference::viewBlockCoverageGraph(
+    const DenseMap<const BasicBlock *, bool> *Coverage) const {
+  DotFuncBCIInfo Info(this, Coverage);
+  WriteGraph(&Info, "BCI", false,
+             "Block Coverage Inference for " + F.getName());
+}
+
+void BlockCoverageInference::dump(raw_ostream &OS) const {
+  OS << "Minimal block coverage for function \'" << F.getName()
+     << "\' (Instrumented=*)\n";
+  for (auto &BB : F) {
+    OS << (shouldInstrumentBlock(BB) ? "* " : "  ") << BB.getName() << "\n";
+    auto It = PredecessorDependencies.find(&BB);
+    if (It != PredecessorDependencies.end() && It->second.size())
+      OS << "    PredDeps = " << getBlockNames(It->second) << "\n";
+    It = SuccessorDependencies.find(&BB);
+    if (It != SuccessorDependencies.end() && It->second.size())
+      OS << "    SuccDeps = " << getBlockNames(It->second) << "\n";
+  }
+  OS << "  Instrumented Blocks Hash = 0x"
+     << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
+}
+
+std::string
+BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  OS << "[";
+  if (!BBs.empty()) {
+    OS << BBs.front()->getName();
+    BBs = BBs.drop_front();
+  }
+  for (auto *BB : BBs)
+    OS << ", " << BB->getName();
+  OS << "]";
+  return OS.str();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -5,6 +5,7 @@
   ControlHeightReduction.cpp
   DataFlowSanitizer.cpp
   GCOVProfiling.cpp
+  BlockCoverageInference.cpp
   MemProfiler.cpp
   MemorySanitizer.cpp
   IndirectCallPromotion.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -112,6 +112,7 @@
 #include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/MisExpect.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -158,6 +159,7 @@
 STATISTIC(NumOfCSPGOMismatch,
           "Number of functions having mismatch profile in CSPGO.");
 STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
+STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
 
 // Command line option to specify the file to read profile from. This is
 // mainly used for testing.
@@ -267,6 +269,15 @@
     cl::desc(
         "Use this option to enable function entry coverage instrumentation."));
 
+static cl::opt<bool> PGOBlockCoverage(
+    "pgo-block-coverage",
+    cl::desc("Use this option to enable basic block coverage instrumentation"));
+
+static cl::opt<bool>
+    PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
+                              cl::desc("Create a dot file of CFGs with block "
+                                       "coverage inference information"));
+
 static cl::opt<bool>
     PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
                      cl::desc("Fix function entry count in profile use."));
@@ -382,6 +393,8 @@
   if (PGOFunctionEntryCoverage)
     ProfileVersion |=
         VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+  if (PGOBlockCoverage)
+    ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
   auto IRLevelVersionVariable = new GlobalVariable(
       M, IntTy64, true, GlobalValue::WeakAnyLinkage,
       Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
@@ -414,8 +427,10 @@
   GlobalVariable *FuncNameVar = nullptr;
   uint64_t FuncHash = 0;
   PGOUseFunc *UseFunc = nullptr;
+  bool HasSingleByteCoverage;
 
-  SelectInstVisitor(Function &Func) : F(Func) {}
+  SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
+      : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
 
   void countSelects(Function &Func) {
     NSIs = 0;
@@ -533,6 +548,16 @@
   // The Minimum Spanning Tree of function CFG.
   CFGMST<Edge, BBInfo> MST;
 
+  const Optional<BlockCoverageInference> BCI;
+
+  static Optional<BlockCoverageInference>
+  constructBCI(Function &Func, bool HasSingleByteCoverage,
+               bool InstrumentFuncEntry) {
+    if (HasSingleByteCoverage)
+      return BlockCoverageInference(Func, InstrumentFuncEntry);
+    return None;
+  }
+
   // Collect all the BBs that will be instrumented, and store them in
   // InstrumentBBs.
   void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
@@ -558,10 +583,14 @@
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
-      bool InstrumentFuncEntry = true)
+      bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
-        TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
-        MST(F, InstrumentFuncEntry, BPI, BFI) {
+        TLI(TLI), ValueSites(IPVK_Last + 1),
+        SIVisitor(Func, HasSingleByteCoverage),
+        MST(F, InstrumentFuncEntry, BPI, BFI),
+        BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
+    if (BCI && PGOViewBlockCoverageGraph)
+      BCI->viewBlockCoverageGraph();
     // This should be done before CFG hash computation.
     SIVisitor.countSelects(Func);
     ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
@@ -636,7 +665,11 @@
     updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
     updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
     updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
-    updateJCH((uint64_t)MST.AllEdges.size());
+    if (BCI) {
+      updateJCH(BCI->getInstrumentedBlocksHash());
+    } else {
+      updateJCH((uint64_t)MST.AllEdges.size());
+    }
 
     // Hash format for context sensitive profile. Reserve 4 bits for other
     // information.
@@ -729,6 +762,13 @@
 template <class Edge, class BBInfo>
 void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
     std::vector<BasicBlock *> &InstrumentBBs) {
+  if (BCI) {
+    for (auto &BB : F)
+      if (BCI->shouldInstrumentBlock(BB))
+        InstrumentBBs.push_back(&BB);
+    return;
+  }
+
   // Use a worklist as we will update the vector during the iteration.
   std::vector<Edge *> EdgeList;
   EdgeList.reserve(MST.AllEdges.size());
@@ -851,12 +891,15 @@
     BlockFrequencyInfo *BFI,
     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
     bool IsCS) {
-  // Split indirectbr critical edges here before computing the MST rather than
-  // later in getInstrBB() to avoid invalidating it.
-  SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+  if (!PGOBlockCoverage) {
+    // Split indirectbr critical edges here before computing the MST rather than
+    // later in getInstrBB() to avoid invalidating it.
+    SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+  }
 
   FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
-      F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
+      F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
+      PGOBlockCoverage);
 
   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
   auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
@@ -886,7 +929,9 @@
     // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
     //                          i32 <index>)
     Builder.CreateCall(
-        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+        Intrinsic::getDeclaration(M, PGOBlockCoverage
+                                         ? Intrinsic::instrprof_cover
+                                         : Intrinsic::instrprof_increment),
         {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
   }
 
@@ -1029,12 +1074,15 @@
   PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
-             ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
+             ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
+             bool HasSingleByteCoverage)
       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
         FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
-                 InstrumentFuncEntry),
+                 InstrumentFuncEntry, HasSingleByteCoverage),
         FreqAttr(FFA_Normal), IsCS(IsCS) {}
 
+  void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
+
   // Read counts for the instrumented BB from profile.
   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
                     InstrProfRecord::CountPseudoKind &PseudoKind);
@@ -1045,6 +1093,9 @@
   // Populate the counts for all BBs.
   void populateCounters();
 
+  // Set block coverage based on profile coverage values.
+  void populateCoverage(IndexedInstrProfReader *PGOReader);
+
   // Set the branch weights based on the count values.
   void setBranchWeights();
 
@@ -1493,6 +1544,46 @@
   return true;
 }
 
+void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
+  handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
+    auto &Ctx = M->getContext();
+    auto Err = IPE.get();
+    bool SkipWarning = false;
+    LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+                      << FuncInfo.FuncName << ": ");
+    if (Err == instrprof_error::unknown_function) {
+      IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
+      SkipWarning = !PGOWarnMissing;
+      LLVM_DEBUG(dbgs() << "unknown function");
+    } else if (Err == instrprof_error::hash_mismatch ||
+               Err == instrprof_error::malformed) {
+      IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
+      SkipWarning =
+          NoPGOWarnMismatch ||
+          (NoPGOWarnMismatchComdatWeak &&
+           (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
+            F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+      LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
+                        << " skip=" << SkipWarning << ")");
+      // Emit function metadata indicating PGO profile mismatch.
+      annotateFunctionWithHashMismatch(F, M->getContext());
+    }
+
+    LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
+    if (SkipWarning)
+      return;
+
+    std::string Msg =
+        IPE.message() + std::string(" ") + F.getName().str() +
+        std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
+        std::string(" up to ") + std::to_string(MismatchedFuncSum) +
+        std::string(" count discarded");
+
+    Ctx.diagnose(
+        DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+  });
+}
+
 // Read the profile from ProfileFileName and assign the value to the
 // instrumented BB and the edges. This function also updates ProgramMaxCount.
 // Return true if the profile are successfully read, and false on errors.
@@ -1503,42 +1594,7 @@
   Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
       FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
   if (Error E = Result.takeError()) {
-    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
-      auto Err = IPE.get();
-      bool SkipWarning = false;
-      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
-                        << FuncInfo.FuncName << ": ");
-      if (Err == instrprof_error::unknown_function) {
-        IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
-        SkipWarning = !PGOWarnMissing;
-        LLVM_DEBUG(dbgs() << "unknown function");
-      } else if (Err == instrprof_error::hash_mismatch ||
-                 Err == instrprof_error::malformed) {
-        IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
-        SkipWarning =
-            NoPGOWarnMismatch ||
-            (NoPGOWarnMismatchComdatWeak &&
-             (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
-              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
-        LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
-                          << " skip=" << SkipWarning << ")");
-        // Emit function metadata indicating PGO profile mismatch.
-        annotateFunctionWithHashMismatch(F, M->getContext());
-      }
-
-      LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
-      if (SkipWarning)
-        return;
-
-      std::string Msg =
-          IPE.message() + std::string(" ") + F.getName().str() +
-          std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
-          std::string(" up to ") + std::to_string(MismatchedFuncSum) +
-          std::string(" count discarded");
-
-      Ctx.diagnose(
-          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
-    });
+    handleInstrProfError(std::move(E), MismatchedFuncSum);
     return false;
   }
   ProfileRecord = std::move(Result.get());
@@ -1577,6 +1633,102 @@
   return true;
 }
 
+void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
+  uint64_t MismatchedFuncSum = 0;
+  Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
+      FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+  if (auto Err = Result.takeError()) {
+    handleInstrProfError(std::move(Err), MismatchedFuncSum);
+    return;
+  }
+
+  std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
+  DenseMap<const BasicBlock *, bool> Coverage;
+  unsigned Index = 0;
+  for (auto &BB : F)
+    if (FuncInfo.BCI->shouldInstrumentBlock(BB))
+      Coverage[&BB] = (CountsFromProfile[Index++] != 0);
+  assert(Index == CountsFromProfile.size());
+
+  // Infer coverage of the non-instrumented blocks.
+  bool WasChanged = true;
+  while (WasChanged) {
+    WasChanged = false;
+    // It is possible to infer coverage in one pass, but the speedup might not
+    // be impactful for small CFGs.
+    for (auto &BB : F) {
+      if (Coverage.count(&BB))
+        continue;
+      for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
+        if (Coverage.count(Dep) && Coverage[Dep]) {
+          Coverage[&BB] = true;
+          WasChanged = true;
+          continue;
+        }
+      }
+    }
+  }
+
+  // Annotate block coverage.
+  MDBuilder MDB(F.getContext());
+  // We set the entry count to 10000 if the entry block is covered so that BFI
+  // can propagate a fraction of this count to the other covered blocks.
+  F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
+  for (auto &BB : F) {
+    // For a block A and its successor B, we set the edge weight as follows:
+    // If A is covered and B is covered, set weight=1.
+    // If A is covered and B is uncovered, set weight=0.
+    // If A is uncovered, set weight=1.
+    // This setup will allow BFI to give nonzero profile counts to only covered
+    // blocks.
+    SmallVector<unsigned, 4> Weights;
+    for (auto *Succ : successors(&BB))
+      Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
+    if (Weights.size() >= 2)
+      BB.getTerminator()->setMetadata(LLVMContext::MD_prof,
+                                      MDB.createBranchWeights(Weights));
+  }
+
+  unsigned NumCorruptCoverage = 0;
+  DominatorTree DT(F);
+  LoopInfo LI(DT);
+  BranchProbabilityInfo BPI(F, LI);
+  BlockFrequencyInfo BFI(F, BPI, LI);
+  auto IsBlockDead = [&](const BasicBlock &BB) {
+    return BFI.getBlockProfileCount(&BB).transform(
+        [](auto C) { return C == 0; });
+  };
+  LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
+  for (auto &BB : F) {
+    LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : "  ")
+                      << (Coverage[&BB] ? "X " : "  ") << " " << BB.getName()
+                      << "\n");
+    // In some cases it is possible to find a covered block that has no covered
+    // successors, e.g., when a block calls a function that may call exit(). In
+    // those cases, BFI could find its successor to be covered while BCI could
+    // find its successor to be dead.
+    if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
+      LLVM_DEBUG(
+          dbgs() << "Found inconsistent block covearge for " << BB.getName()
+                 << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
+                 << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
+      NumCorruptCoverage++;
+    }
+    if (Coverage[&BB])
+      ++NumCoveredBlocks;
+  }
+  if (PGOVerifyBFI && NumCorruptCoverage) {
+    auto &Ctx = M->getContext();
+    Ctx.diagnose(DiagnosticInfoPGOProfile(
+        M->getName().data(),
+        Twine("Found inconsistent block coverage for function ") + F.getName() +
+            " in " + Twine(NumCorruptCoverage) + " blocks.",
+        DS_Warning));
+  }
+  if (PGOViewBlockCoverageGraph)
+    FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
+}
+
 // Populate the counters from instrumented BBs to all BBs.
 // In the end of this operation, all BBs should have a valid count value.
 void PGOUseFunc::populateCounters() {
@@ -1736,8 +1888,6 @@
 }
 
 void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
-  if (PGOFunctionEntryCoverage)
-    return;
   Module *M = F.getParent();
   IRBuilder<> Builder(&SI);
   Type *Int64Ty = Builder.getInt64Ty();
@@ -1770,7 +1920,9 @@
 }
 
 void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
-  if (!PGOInstrSelect)
+  // TODO: We can instrument coverage on selects by adding a new
+  // llvm.instrprof.cover.select intrinsic.
+  if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
     return;
   // FIXME: do not handle this yet.
   if (SI.getCondition()->getType()->isVectorTy())
@@ -2091,12 +2243,6 @@
         ProfileFileName.data(), "Not an IR level instrumentation profile"));
     return false;
   }
-  if (PGOReader->hasSingleByteCoverage()) {
-    Ctx.diagnose(DiagnosticInfoPGOProfile(
-        ProfileFileName.data(),
-        "Cannot use coverage profiles for optimization"));
-    return false;
-  }
   if (PGOReader->functionEntryOnly()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
         ProfileFileName.data(),
@@ -2122,17 +2268,21 @@
   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
   if (PGOInstrumentEntry.getNumOccurrences() > 0)
     InstrumentFuncEntry = PGOInstrumentEntry;
+  bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {
     if (skipPGO(F))
       continue;
     auto &TLI = LookupTLI(F);
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
-    // Split indirectbr critical edges here before computing the MST rather than
-    // later in getInstrBB() to avoid invalidating it.
-    SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+    if (!HasSingleByteCoverage) {
+      // Split indirectbr critical edges here before computing the MST rather
+      // than later in getInstrBB() to avoid invalidating it.
+      SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
+                                   BFI);
+    }
     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
-                    InstrumentFuncEntry);
+                    InstrumentFuncEntry, HasSingleByteCoverage);
     // Read and match memprof first since we do this via debug info and can
     // match even if there is an IR mismatch detected for regular PGO below.
     if (PGOReader->hasMemoryProfile())
@@ -2141,6 +2291,10 @@
     if (!PGOReader->isIRLevelProfile())
       continue;
 
+    if (HasSingleByteCoverage) {
+      Func.populateCoverage(PGOReader.get());
+      continue;
+    }
     // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
     // it means the profile for the function is unrepresentative and this
     // function is actually hot / warm. We will reset the function hot / cold
diff --git a/llvm/test/Transforms/PGOProfile/coverage.ll b/llvm/test/Transforms/PGOProfile/coverage.ll
--- a/llvm/test/Transforms/PGOProfile/coverage.ll
+++ b/llvm/test/Transforms/PGOProfile/coverage.ll
@@ -1,26 +1,150 @@
-; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-gen -pgo-function-entry-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,ENTRY
+; RUN: opt < %s -passes=pgo-instr-gen -pgo-block-coverage -S | FileCheck %s --implicit-check-not="instrprof.cover" --check-prefixes=CHECK,BLOCK
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @foo(i32 %i) {
+define void @foo() {
+; CHECK-LABEL: entry:
 entry:
-  ; CHECK: call void @llvm.instrprof.cover({{.*}})
-  %cmp = icmp sgt i32 %i, 0
-  br i1 %cmp, label %if.then, label %if.else
+  ; ENTRY: call void @llvm.instrprof.cover({{.*}})
+  %c = call i1 @choice()
+  br i1 %c, label %if.then, label %if.else
 
+; CHECK-LABEL: if.then:
 if.then:
-  ; CHECK-NOT: llvm.instrprof.cover(
-  %add = add nsw i32 %i, 2
-  %s = select i1 %cmp, i32 %add, i32 0
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
   br label %if.end
 
+; CHECK-LABEL: if.else:
 if.else:
-  %sub = sub nsw i32 %i, 2
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %if.end
+
+; CHECK-LABEL: if.end:
+if.end:
+  ret void
+}
+
+define void @bar() {
+; CHECK-LABEL: entry:
+entry:
+  ; ENTRY: call void @llvm.instrprof.cover({{.*}})
+  %c = call i1 @choice()
+  br i1 %c, label %if.then, label %if.end
+
+; CHECK-LABEL: if.then:
+if.then:
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
   br label %if.end
 
+; CHECK-LABEL: if.end:
 if.end:
-  %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
-  ret i32 %retv
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  ret void
+}
+
+define void @goo() {
+; CHECK-LABEL: entry:
+entry:
+  ; CHECK: call void @llvm.instrprof.cover({{.*}})
+  ret void
+}
+
+define void @loop() {
+; CHECK-LABEL: entry:
+entry:
+  ; CHECK: call void @llvm.instrprof.cover({{.*}})
+  br label %while
+while:
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %while
+}
+
+; Function Attrs: noinline nounwind ssp uwtable
+define void @hoo(i32 %a) #0 {
+; CHECK-LABEL: entry:
+entry:
+  ; ENTRY: call void @llvm.instrprof.cover({{.*}})
+  %a.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %rem = srem i32 %0, 2
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK-LABEL: if.then:
+if.then:                                          ; preds = %entry
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %if.end
+
+; CHECK-LABEL: if.else:
+if.else:                                          ; preds = %entry
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %if.end
+
+; CHECK-LABEL: if.end:
+if.end:                                           ; preds = %if.else, %if.then
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+; CHECK-LABEL: for.cond:
+for.cond:                                         ; preds = %for.inc, %if.end
+  %1 = load i32, i32* %i, align 4
+  %2 = load i32, i32* %a.addr, align 4
+  %cmp1 = icmp slt i32 %1, %2
+  br i1 %cmp1, label %for.body, label %for.end
+
+; CHECK-LABEL: for.body:
+for.body:                                         ; preds = %for.cond
+  %3 = load i32, i32* %a.addr, align 4
+  %rem2 = srem i32 %3, 3
+  %cmp3 = icmp eq i32 %rem2, 0
+  br i1 %cmp3, label %if.then4, label %if.else5
+
+; CHECK-LABEL: if.then4:
+if.then4:                                         ; preds = %for.body
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %if.end10
+
+; CHECK-LABEL: if.else5:
+if.else5:                                         ; preds = %for.body
+  %4 = load i32, i32* %a.addr, align 4
+  %rem6 = srem i32 %4, 1001
+  %cmp7 = icmp eq i32 %rem6, 0
+  br i1 %cmp7, label %if.then8, label %if.end9
+
+; CHECK-LABEL: if.then8:
+if.then8:                                         ; preds = %if.else5
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %return
+
+; CHECK-LABEL: if.end9:
+if.end9:                                          ; preds = %if.else5
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %if.end10
+
+; CHECK-LABEL: if.end10:
+if.end10:                                         ; preds = %if.end9, %if.then4
+  br label %for.inc
+
+; CHECK-LABEL: for.inc:
+for.inc:                                          ; preds = %if.end10
+  %5 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %5, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+; CHECK-LABEL: for.end:
+for.end:                                          ; preds = %for.cond
+  ; BLOCK: call void @llvm.instrprof.cover({{.*}})
+  br label %return
+
+; CHECK-LABEL: return:
+return:                                           ; preds = %for.end, %if.then8
+  ret void
 }
 
-; CHECK: declare void @llvm.instrprof.cover(
+declare i1 @choice()
+
+; CHECK: declare void @llvm.instrprof.cover({{.*}})