diff --git a/bolt/include/bolt/Core/HashUtilities.h b/bolt/include/bolt/Core/HashUtilities.h
--- a/bolt/include/bolt/Core/HashUtilities.h
+++ b/bolt/include/bolt/Core/HashUtilities.h
@@ -20,8 +20,6 @@
 namespace llvm {
 namespace bolt {
 
-uint16_t hash_64_to_16(const uint64_t Hash);
-
 std::string hashInteger(uint64_t Value);
 
 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol);
@@ -35,6 +33,8 @@
 std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
                       OperandHashFuncTy OperandHashFunc);
 
+std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB);
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/lib/Core/HashUtilities.cpp b/bolt/lib/Core/HashUtilities.cpp
--- a/bolt/lib/Core/HashUtilities.cpp
+++ b/bolt/lib/Core/HashUtilities.cpp
@@ -130,5 +130,42 @@
   return HashString;
 }
 
+/// A "loose" hash of a basic block to use with the stale profile matching. The
+/// computed value will be the same for blocks with minor changes (such as
+/// reordering of instructions or using different operands) but may result in
+/// collisions that need to be resolved by a stronger hashing.
+std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
+  // The hash is computed by creating a string of all lexicographically ordered
+  // instruction opcodes, which is then hashed with std::hash.
+  std::set<std::string> Opcodes;
+  for (const MCInst &Inst : BB) {
+    if (BC.MIB->isPseudo(Inst))
+      continue;
+
+    // Ignore unconditional jumps, as they can be added / removed as a result
+    // of basic block reordering.
+    if (BC.MIB->isUnconditionalBranch(Inst))
+      continue;
+
+    // Do not distinguish different types of conditional jumps.
+    if (BC.MIB->isConditionalBranch(Inst)) {
+      Opcodes.insert("JMP");
+      continue;
+    }
+
+    std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
+    Mnemonic.erase(
+        std::remove_if(Mnemonic.begin(), Mnemonic.end(),
+                       [](unsigned char ch) { return std::isspace(ch); }),
+        Mnemonic.end());
+    Opcodes.insert(Mnemonic);
+  }
+
+  std::string HashString;
+  for (const std::string &Opcode : Opcodes)
+    HashString.append(Opcode);
+  return HashString;
+}
+
 } // namespace bolt
 } // namespace llvm
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -73,64 +73,39 @@
 
 cl::opt<unsigned> StaleMatchingCostBlockInc(
     "stale-matching-cost-block-inc",
-    cl::desc("The cost of increasing a block's count by one."), cl::init(110),
+    cl::desc("The cost of increasing a block count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingCostBlockDec(
     "stale-matching-cost-block-dec",
-    cl::desc("The cost of decreasing a block's count by one."), cl::init(100),
+    cl::desc("The cost of decreasing a block count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-cl::opt<unsigned> StaleMatchingCostBlockEntryInc(
-    "stale-matching-cost-block-entry-inc",
-    cl::desc("The cost of increasing the entry block's count by one."),
-    cl::init(110), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
-cl::opt<unsigned> StaleMatchingCostBlockEntryDec(
-    "stale-matching-cost-block-entry-dec",
-    cl::desc("The cost of decreasing the entry block's count by one."),
-    cl::init(100), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
-cl::opt<unsigned> StaleMatchingCostBlockZeroInc(
-    "stale-matching-cost-block-zero-inc",
-    cl::desc("The cost of increasing a count of zero-weight block by one."),
-    cl::init(10), cl::Hidden, cl::cat(BoltOptCategory));
-
-cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
-    "stale-matching-cost-block-unknown-inc",
-    cl::desc("The cost of increasing an unknown block's count by one."),
-    cl::init(10), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
 cl::opt<unsigned> StaleMatchingCostJumpInc(
     "stale-matching-cost-jump-inc",
-    cl::desc("The cost of increasing a jump's count by one."), cl::init(100),
+    cl::desc("The cost of increasing a jump count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-cl::opt<unsigned> StaleMatchingCostJumpFTInc(
-    "stale-matching-cost-jump-ft-inc",
-    cl::desc("The cost of increasing a fall-through jump's count by one."),
-    cl::init(100), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
 cl::opt<unsigned> StaleMatchingCostJumpDec(
     "stale-matching-cost-jump-dec",
-    cl::desc("The cost of decreasing a jump's count by one."), cl::init(110),
+    cl::desc("The cost of decreasing a jump count by one."), cl::init(150),
     cl::ReallyHidden, cl::cat(BoltOptCategory));
 
-cl::opt<unsigned> StaleMatchingCostJumpFTDec(
-    "stale-matching-cost-jump-ft-dec",
-    cl::desc("The cost of decreasing a fall-through jump's count by one."),
-    cl::init(110), cl::ReallyHidden, cl::cat(BoltOptCategory));
+cl::opt<unsigned> StaleMatchingCostBlockUnknownInc(
+    "stale-matching-cost-block-unknown-inc",
+    cl::desc("The cost of increasing an unknown block count by one."),
+    cl::init(1), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingCostJumpUnknownInc(
     "stale-matching-cost-jump-unknown-inc",
-    cl::desc("The cost of increasing an unknown jump's count by one."),
-    cl::init(50), cl::ReallyHidden, cl::cat(BoltOptCategory));
+    cl::desc("The cost of increasing an unknown jump count by one."),
+    cl::init(140), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
     "stale-matching-cost-jump-unknown-ft-inc",
     cl::desc(
-        "The cost of increasing an unknown fall-through jump's count by one."),
-    cl::init(5), cl::ReallyHidden, cl::cat(BoltOptCategory));
+        "The cost of increasing an unknown fall-through jump count by one."),
+    cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
 
 } // namespace opts
 
@@ -145,7 +120,8 @@
   using ValueOffset = Bitfield::Element<uint16_t, 0, 16>;
   using ValueOpcode = Bitfield::Element<uint16_t, 16, 16>;
   using ValueInstr = Bitfield::Element<uint16_t, 32, 16>;
-  using ValueNeighbor = Bitfield::Element<uint16_t, 48, 16>;
+  using ValuePred = Bitfield::Element<uint8_t, 48, 8>;
+  using ValueSucc = Bitfield::Element<uint8_t, 56, 8>;
 
 public:
   explicit BlendedBlockHash() {}
@@ -154,7 +130,8 @@
     Offset = Bitfield::get<ValueOffset>(Hash);
     OpcodeHash = Bitfield::get<ValueOpcode>(Hash);
     InstrHash = Bitfield::get<ValueInstr>(Hash);
-    NeighborHash = Bitfield::get<ValueNeighbor>(Hash);
+    PredHash = Bitfield::get<ValuePred>(Hash);
+    SuccHash = Bitfield::get<ValueSucc>(Hash);
   }
 
   /// Combine the blended hash into uint64_t.
@@ -163,7 +140,8 @@
     Bitfield::set<ValueOffset>(Hash, Offset);
     Bitfield::set<ValueOpcode>(Hash, OpcodeHash);
     Bitfield::set<ValueInstr>(Hash, InstrHash);
-    Bitfield::set<ValueNeighbor>(Hash, NeighborHash);
+    Bitfield::set<ValuePred>(Hash, PredHash);
+    Bitfield::set<ValueSucc>(Hash, SuccHash);
     return Hash;
   }
 
@@ -175,7 +153,8 @@
            "incorrect blended hash distance computation");
     uint64_t Dist = 0;
     // Account for NeighborHash
-    Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
+    Dist += SuccHash == BBH.SuccHash ? 0 : 1;
+    Dist += PredHash == BBH.PredHash ? 0 : 1;
     Dist <<= 16;
     // Account for InstrHash
     Dist += InstrHash == BBH.InstrHash ? 0 : 1;
@@ -192,9 +171,10 @@
   /// (Strong) Hash of the basic block instructions, including opcodes and
   /// operands.
   uint16_t InstrHash{0};
-  /// Hash of the (loose) basic block together with (loose) hashes of its
-  /// successors and predecessors.
-  uint16_t NeighborHash{0};
+  /// (Loose) Hashes of the predecessors of the basic block.
+  uint8_t PredHash{0};
+  /// (Loose) Hashes of the successors of the basic block.
+  uint8_t SuccHash{0};
 };
 
 /// The object is used to identify and match basic blocks in a BinaryFunction
@@ -252,41 +232,43 @@
 
   std::vector<BlendedBlockHash> BlendedHashes(BasicBlocks.size());
   std::vector<uint64_t> OpcodeHashes(BasicBlocks.size());
-  // Initialize hash components
+  // Initialize hash components.
   for (size_t I = 0; I < BasicBlocks.size(); I++) {
     const BinaryBasicBlock *BB = BasicBlocks[I];
     assert(BB->getIndex() == I && "incorrect block index");
     BlendedHashes[I].Offset = BB->getOffset();
-    // Hashing complete instructions
+    // Hashing complete instructions.
     std::string InstrHashStr = hashBlock(
         BC, *BB, [&](const MCOperand &Op) { return hashInstOperand(BC, Op); });
     uint64_t InstrHash = std::hash<std::string>{}(InstrHashStr);
-    BlendedHashes[I].InstrHash = hash_64_to_16(InstrHash);
-    // Hashing opcodes
-    std::string OpcodeHashStr =
-        hashBlock(BC, *BB, [](const MCOperand &Op) { return std::string(); });
+    BlendedHashes[I].InstrHash = (uint16_t)hash_value(InstrHash);
+    // Hashing opcodes.
+    std::string OpcodeHashStr = hashBlockLoose(BC, *BB);
     OpcodeHashes[I] = std::hash<std::string>{}(OpcodeHashStr);
-    BlendedHashes[I].OpcodeHash = hash_64_to_16(OpcodeHashes[I]);
+    BlendedHashes[I].OpcodeHash = (uint16_t)hash_value(OpcodeHashes[I]);
   }
 
-  // Initialize neighbor hash
+  // Initialize neighbor hash.
   for (size_t I = 0; I < BasicBlocks.size(); I++) {
     const BinaryBasicBlock *BB = BasicBlocks[I];
-    uint64_t Hash = OpcodeHashes[I];
-    // Append hashes of successors
+    // Append hashes of successors.
+    uint64_t Hash = 0;
     for (BinaryBasicBlock *SuccBB : BB->successors()) {
       uint64_t SuccHash = OpcodeHashes[SuccBB->getIndex()];
       Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
     }
-    // Append hashes of predecessors
+    BlendedHashes[I].SuccHash = (uint8_t)hash_value(Hash);
+
+    // Append hashes of predecessors.
+    Hash = 0;
     for (BinaryBasicBlock *PredBB : BB->predecessors()) {
       uint64_t PredHash = OpcodeHashes[PredBB->getIndex()];
       Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
     }
-    BlendedHashes[I].NeighborHash = hash_64_to_16(Hash);
+    BlendedHashes[I].PredHash = (uint8_t)hash_value(Hash);
   }
 
-  //  Assign hashes
+  //  Assign hashes.
   for (size_t I = 0; I < BasicBlocks.size(); I++) {
     const BinaryBasicBlock *BB = BasicBlocks[I];
     BB->setHash(BlendedHashes[I].combine());
@@ -409,20 +391,22 @@
     const FlowBlock *MatchedBlock = Matcher.matchBlock(YamlHash);
     if (MatchedBlock != nullptr) {
       MatchedBlocks[YamlBB.Index] = MatchedBlock;
-      LLVM_DEBUG(dbgs() << "Matched yaml block with bid = " << YamlBB.Index
-                        << " and hash = " << Twine::utohexstr(YamlBB.Hash)
-                        << " to BB with index = " << MatchedBlock->Index - 1
+      BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
+      LLVM_DEBUG(dbgs() << "Matched yaml block (bid = " << YamlBB.Index << ")"
+                        << " with hash " << Twine::utohexstr(YamlBB.Hash)
+                        << " to BB (index = " << MatchedBlock->Index - 1 << ")"
+                        << " with hash " << Twine::utohexstr(BinHash.combine())
                         << "\n");
       // Update matching stats accounting for the matched block.
-      BlendedBlockHash BinHash = BlendedHashes[MatchedBlock->Index - 1];
       if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
         ++BC.Stats.NumMatchedBlocks;
         BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
+        LLVM_DEBUG(dbgs() << "  exact match\n");
       }
     } else {
       LLVM_DEBUG(
-          dbgs() << "Couldn't match yaml block with bid = " << YamlBB.Index
-                 << " and hash = " << Twine::utohexstr(YamlBB.Hash) << "\n");
+          dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index << ")"
+                 << " with hash " << Twine::utohexstr(YamlBB.Hash) << "\n");
     }
 
     // Update matching stats.
@@ -575,16 +559,15 @@
   Params.JoinIslands = opts::StaleMatchingJoinIslands;
 
   Params.CostBlockInc = opts::StaleMatchingCostBlockInc;
+  Params.CostBlockEntryInc = opts::StaleMatchingCostBlockInc;
   Params.CostBlockDec = opts::StaleMatchingCostBlockDec;
-  Params.CostBlockEntryInc = opts::StaleMatchingCostBlockEntryInc;
-  Params.CostBlockEntryDec = opts::StaleMatchingCostBlockEntryDec;
-  Params.CostBlockZeroInc = opts::StaleMatchingCostBlockZeroInc;
+  Params.CostBlockEntryDec = opts::StaleMatchingCostBlockDec;
   Params.CostBlockUnknownInc = opts::StaleMatchingCostBlockUnknownInc;
 
   Params.CostJumpInc = opts::StaleMatchingCostJumpInc;
-  Params.CostJumpFTInc = opts::StaleMatchingCostJumpFTInc;
+  Params.CostJumpFTInc = opts::StaleMatchingCostJumpInc;
   Params.CostJumpDec = opts::StaleMatchingCostJumpDec;
-  Params.CostJumpFTDec = opts::StaleMatchingCostJumpFTDec;
+  Params.CostJumpFTDec = opts::StaleMatchingCostJumpDec;
   Params.CostJumpUnknownInc = opts::StaleMatchingCostJumpUnknownInc;
   Params.CostJumpUnknownFTInc = opts::StaleMatchingCostJumpUnknownFTInc;
 
diff --git a/bolt/test/X86/Inputs/blarge_profile_stale.yaml b/bolt/test/X86/Inputs/blarge_profile_stale.yaml
--- a/bolt/test/X86/Inputs/blarge_profile_stale.yaml
+++ b/bolt/test/X86/Inputs/blarge_profile_stale.yaml
@@ -6,6 +6,7 @@
   profile-flags:   [ lbr ]
   profile-origin:  branch profile reader
   profile-events:  ''
+  dfs-order:       false
 functions:
   - name:            SolveCubic
     fid:             6
@@ -15,20 +16,24 @@
     blocks:
       - bid:             0
         insns:           43
-        hash:            0xD2411AC186118199
+        hash:            0xed4db287e71c0000
         exec:            151
-        succ:            [ { bid: 1, cnt: 4, mis: 2 }, { bid: 11, cnt: 0 } ]
+        succ:            [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ]
       - bid:             1
         insns:           7
-        hash:            0xDF0C9CC1FEAA70C3
-        succ:            [ { bid: 10, cnt: 0 }, { bid: 2, cnt: 0 } ]
+        hash:            0x39330000e4560088
+        succ:            [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ]
       - bid:             13
         insns:           26
-        hash:            0xF05DC5524E99E56F
-        succ:            [ { bid: 15, cnt: 89 }, { bid: 14, cnt: 0 } ]
-      - bid:             15
+        hash:            0xa9700000fe202a7
+        succ:            [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ]
+      - bid:             3
+        insns:           9
+        hash:            0x62391dad18a700a0
+        succ:            [ { bid: 5, cnt: 151 } ]
+      - bid:             5
         insns:           9
-        hash:            0xB2E8338276A9834E
+        hash:            0x4d906d19ecec0111
   - name:            usqrt
     fid:             7
     hash:            0x8B62B1F9AD81EA35
@@ -37,15 +42,15 @@
     blocks:
       - bid:             0
         insns:           4
-        hash:            0xb1e5b76571270000
+        hash:            0x1111111111111111
         exec:            20
         succ:            [ { bid: 1, cnt: 0 } ]
       - bid:             1
         insns:           9
-        hash:            0x587e93788b970010
+        hash:            0x27e43a5e10cd0010
         succ:            [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ]
       - bid:             3
         insns:           2
-        hash:            0x20e605d745e50039
+        hash:            0x4db935b6471e0039
         succ:            [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ]
 ...
diff --git a/bolt/test/X86/reader-stale-yaml.test b/bolt/test/X86/reader-stale-yaml.test
--- a/bolt/test/X86/reader-stale-yaml.test
+++ b/bolt/test/X86/reader-stale-yaml.test
@@ -1,39 +1,71 @@
 # This script checks that YamlProfileReader in llvm-bolt is reading data
-# correctly and stale data is corrected.
+# correctly and stale data is corrected by profile inference.
 
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
+# Testing "usqrt"
 RUN: llvm-bolt %t.exe -o /dev/null --b %p/Inputs/blarge_profile_stale.yaml \
 RUN:   --print-cfg --print-only=usqrt --infer-stale-profile=1 \
-RUN:   --profile-ignore-hash=1 --profile-use-dfs 2>&1 | FileCheck %s
+RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 2>&1 | FileCheck %s -check-prefix=CHECK1
+# Testing "SolveCubic"
+RUN: llvm-bolt %t.exe -o /dev/null --b %p/Inputs/blarge_profile_stale.yaml \
+RUN:   --print-cfg --print-only=SolveCubic --infer-stale-profile=1 \
+RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 2>&1 | FileCheck %s -check-prefix=CHECK2
+
+# Function "usqrt" has stale profile, since the number of blocks in the profile
+# (nblocks=6) does not match the size of the CFG in the binary. The entry
+# block (bid=0) has an incorrect (missing) count, which should be inferred by
+# the algorithm.
 
 # Verify that yaml reader works as expected.
-CHECK:  pre-processing profile using YAML profile reader
+CHECK1:  pre-processing profile using YAML profile reader
+CHECK1:    Binary Function "usqrt" after building cfg {
+CHECK1:      State       : CFG constructed
+CHECK1:      Address     : 0x401170
+CHECK1:      Size        : 0x43
+CHECK1:      Section     : .text
+CHECK1:      IsSimple    : 1
+CHECK1:      BB Count    : 5
+CHECK1:      Exec Count  : 20
+CHECK1:      Branch Count: 640
+CHECK1:    }
+# Verify block counts.
+CHECK1:    .LBB01 (4 instructions, align : 1)
+CHECK1:      Successors: .Ltmp[[#BB13:]] (mispreds: 0, count: 20)
+CHECK1:    .Ltmp[[#BB13:]] (9 instructions, align : 1)
+CHECK1:      Successors: .Ltmp[[#BB12:]] (mispreds: 0, count: 320), .LFT[[#BB0:]] (mispreds: 0, count: 0)
+CHECK1:    .LFT[[#BB0:]] (2 instructions, align : 1)
+CHECK1:      Successors: .Ltmp[[#BB12:]] (mispreds: 0, count: 0)
+CHECK1:    .Ltmp[[#BB12:]] (2 instructions, align : 1)
+CHECK1:      Successors: .Ltmp[[#BB13:]] (mispreds: 0, count: 300), .LFT[[#BB1:]] (mispreds: 0, count: 20)
+CHECK1:    .LFT[[#BB1:]] (2 instructions, align : 1)
+# Check the overall inference stats.
+CHECK1:  2 out of 7 functions in the binary (28.6%) have non-empty execution profile
+CHECK1:  inferred profile for 2 (100.00% of profiled, 100.00% of stale) functions responsible for {{.*}} samples ({{.*}} out of {{.*}})
 
-# Verify the inferred counts of "usqrt" that has stale profile:
-#   - the function has nblocks=6 in the profile, which makes it stale
-#   - block with bid=0 has an incorrect (missing) count, which is inferred
-CHECK:    Binary Function "usqrt" after building cfg {
-CHECK:      State       : CFG constructed
-CHECK:      Address     : 0x401170
-CHECK:      Size        : 0x43
-CHECK:      Section     : .text
-CHECK:      IsSimple    : 1
-CHECK:      BB Count    : 5
-CHECK:      Exec Count  : 20
-CHECK:      Branch Count: 640
-CHECK:    }
 
-# Verify block counts.
-CHECK:    .LBB01 (4 instructions, align : 1)
-CHECK:      Successors: .Ltmp[[#BB13:]] (mispreds: 0, count: 20)
-CHECK:    .Ltmp[[#BB13:]] (9 instructions, align : 1)
-CHECK:      Successors: .Ltmp[[#BB12:]] (mispreds: 0, count: 320), .LFT[[#BB0:]] (mispreds: 0, count: 0)
-CHECK:    .LFT[[#BB0:]] (2 instructions, align : 1)
-CHECK:      Successors: .Ltmp[[#BB12:]] (mispreds: 0, count: 0)
-CHECK:    .Ltmp[[#BB12:]] (2 instructions, align : 1)
-CHECK:      Successors: .Ltmp[[#BB13:]] (mispreds: 0, count: 300), .LFT[[#BB1:]] (mispreds: 0, count: 20)
-CHECK:    .LFT[[#BB1:]] (2 instructions, align : 1)
+# Function "SolveCubic" has stale profile, since there is one jump in the
+# profile (from bid=13 to bid=2) which is not in the CFG in the binary. The test
+# verifies that the inference is able to match two blocks (bid=1 and bid=13)
+# using "loose" hashes and then correctly propagate the counts.
 
-# Check the overal inference stats.
-CHECK:  2 out of 7 functions in the binary (28.6%) have non-empty execution profile
-CHECK:  inferred profile for 1 (50.00% of profiled, 100.00% of stale) functions responsible for 87.31% samples (640 out of 733)
+CHECK2:  pre-processing profile using YAML profile reader
+CHECK2:    Binary Function "SolveCubic" after building cfg {
+CHECK2:      State       : CFG constructed
+CHECK2:      Address     : 0x400e00
+CHECK2:      Size        : 0x368
+CHECK2:      Section     : .text
+CHECK2:      IsSimple    : 1
+CHECK2:      BB Count    : 18
+CHECK2:      Exec Count  : 151
+CHECK2:      Branch Count: 552
+# Verify block counts.
+CHECK2:    .LBB00 (43 instructions, align : 1)
+CHECK2:      Successors: .Ltmp[[#BB7:]] (mispreds: 0, count: 0), .LFT[[#BB1:]] (mispreds: 0, count: 151)
+CHECK2:    .LFT[[#BB1:]] (5 instructions, align : 1)
+CHECK2:      Successors: .Ltmp[[#BB13:]] (mispreds: 0, count: 151), .LFT[[#BB2:]] (mispreds: 0, count: 0)
+CHECK2:    .Ltmp[[#BB3:]] (26 instructions, align : 1)
+CHECK2:      Successors: .Ltmp[[#BB5:]] (mispreds: 0, count: 151), .LFT[[#BB4:]] (mispreds: 0, count: 0)
+CHECK2:    .Ltmp[[#BB5:]] (9 instructions, align : 1)
+CHECK2:    .Ltmp[[#BB13:]] (12 instructions, align : 1)
+CHECK2:      Successors: .Ltmp[[#BB3:]] (mispreds: 0, count: 151)
+CHECK2:  2 out of 7 functions in the binary (28.6%) have non-empty execution profile