Index: test/tools/llvm-exegesis/X86/analysis-latency-instruction-chaining.test
===================================================================
--- /dev/null
+++ test/tools/llvm-exegesis/X86/analysis-latency-instruction-chaining.test
@@ -0,0 +1,59 @@
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.5 -analysis-inconsistency-epsilon=0.5 -analysis-numpoints=1 | FileCheck -check-prefixes=CHECK-ALL %s
+
+# CHECK-ALL: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
+
+# CHECK-NEXT: {{^}}0,
+# CHECK-SAME: ,1.00{{$}}
+# CHECK-NEXT: {{^}}0,
+# CHECK-SAME: ,1.00{{$}}
+
+# Instructions were executed serially, meaning that the next instruction
+# *ONLY* starts executing when the current instruction finishes.
+# Thus, the real latency of the first instruction is the per_snippet_value minus
+# the sum of latencies of all the other instructions in the snippet.
+
+# RCR8rCL has latency of 11. (the value from scheduling profile!)
+# Latency of whole snipped is 12 or 23. (not measured, hand-written.)
+# Thus, latency of BT32rr is 12-11 = 1, or 23-11-11 = 1
+
+---
+mode:            latency
+key:
+  instructions:
+    - 'BT32rr R11D R11D'
+    - 'RCR8rCL R11B R11B'
+  config:          ''
+  register_initial_values:
+    - 'R11D=0x0'
+    - 'R11B=0x0'
+    - 'CL=0x0'
+cpu_name:        bdver2
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: latency, value: 0.0000, per_snippet_value: 12.0000 }
+error:           ''
+info:            Repeating two instructions
+assembled_snippet: 41BB0000000041B300B100450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DBC3
+...
+---
+mode:            latency
+key:
+  instructions:
+    - 'BT32rr R11D R11D'
+    - 'RCR8rCL R11B R11B'
+    - 'RCR8rCL R11B R11B'
+  config:          ''
+  register_initial_values:
+    - 'R11D=0x0'
+    - 'R11B=0x0'
+    - 'CL=0x0'
+cpu_name:        bdver2
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: latency, value: 99.0000, per_snippet_value: 23.0000 }
+error:           ''
+info:            Repeating two instructions
+assembled_snippet: 41BB0000000041B300B100450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DB450FA3DB41D2DBC3
+...
Index: tools/llvm-exegesis/lib/Analysis.h
===================================================================
--- tools/llvm-exegesis/lib/Analysis.h
+++ tools/llvm-exegesis/lib/Analysis.h
@@ -37,6 +37,7 @@
 class Analysis {
 public:
   Analysis(const llvm::Target &Target,
+           std::unique_ptr<llvm::MCSubtargetInfo> SubtargetInfo,
            std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
            const InstructionBenchmarkClustering &Clustering,
            double AnalysisInconsistencyEpsilon,
Index: tools/llvm-exegesis/lib/Analysis.cpp
===================================================================
--- tools/llvm-exegesis/lib/Analysis.cpp
+++ tools/llvm-exegesis/lib/Analysis.cpp
@@ -158,11 +158,13 @@
 }
 
 Analysis::Analysis(const llvm::Target &Target,
+                   std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
                    std::unique_ptr<llvm::MCInstrInfo> InstrInfo,
                    const InstructionBenchmarkClustering &Clustering,
                    double AnalysisInconsistencyEpsilon,
                    bool AnalysisDisplayUnstableOpcodes)
-    : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
+    : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)),
+      InstrInfo_(std::move(InstrInfo)),
       AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
                                            AnalysisInconsistencyEpsilon),
       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
@@ -172,8 +174,6 @@
   const InstructionBenchmark &FirstPoint = Clustering.getPoints().front();
   RegInfo_.reset(Target.createMCRegInfo(FirstPoint.LLVMTriple));
   AsmInfo_.reset(Target.createMCAsmInfo(*RegInfo_, FirstPoint.LLVMTriple));
-  SubtargetInfo_.reset(Target.createMCSubtargetInfo(FirstPoint.LLVMTriple,
-                                                    FirstPoint.CpuName, ""));
   InstPrinter_.reset(Target.createMCInstPrinter(
       llvm::Triple(FirstPoint.LLVMTriple), 0 /*default variant*/, *AsmInfo_,
       *InstrInfo_, *RegInfo_));
Index: tools/llvm-exegesis/lib/CMakeLists.txt
===================================================================
--- tools/llvm-exegesis/lib/CMakeLists.txt
+++ tools/llvm-exegesis/lib/CMakeLists.txt
@@ -27,6 +27,7 @@
   LlvmState.cpp
   MCInstrDescView.cpp
   PerfHelper.cpp
+  PostProcessing.cpp
   RegisterAliasing.cpp
   RegisterValue.cpp
   SchedClassResolution.cpp
Index: tools/llvm-exegesis/lib/PostProcessing.h
===================================================================
--- /dev/null
+++ tools/llvm-exegesis/lib/PostProcessing.h
@@ -0,0 +1,33 @@
+//===-- PostProcessing.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Post-processing for the benchmark points.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_EXEGESIS_POSTPROCESSING_H
+#define LLVM_TOOLS_LLVM_EXEGESIS_POSTPROCESSING_H
+
+#include "BenchmarkResult.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+namespace exegesis {
+
+void PostProcessBenchmarkPoints(
+    const llvm::MCSubtargetInfo &SubtargetInfo,
+    const llvm::MCInstrInfo &InstrInfo,
+    llvm::MutableArrayRef<InstructionBenchmark> Points);
+
+} // namespace exegesis
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_EXEGESIS_POSTPROCESSING_H
Index: tools/llvm-exegesis/lib/PostProcessing.cpp
===================================================================
--- /dev/null
+++ tools/llvm-exegesis/lib/PostProcessing.cpp
@@ -0,0 +1,87 @@
+//===-- PostProcessing.cpp --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PostProcessing.h"
+#include "Clustering.h"
+#include "SchedClassResolution.h"
+#include "llvm/ADT/STLExtras.h"
+#include <utility>
+
+namespace llvm {
+namespace exegesis {
+
+static std::vector<BenchmarkMeasure>
+GetSchedDataAsPoint(const llvm::MCSubtargetInfo &SubtargetInfo,
+                    const llvm::MCInstrInfo &InstrInfo,
+                    const llvm::MCInst &Instr,
+                    const InstructionBenchmark &Point,
+                    const SchedClassClusterCentroid &Centroid) {
+  // 1. Resolve sched class id of the instruction
+  std::pair<unsigned /*SchedClassId*/, bool /*WasVariant*/> ID =
+      ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, Instr);
+
+  // 2. Produce ResolvedSchedClass for the resolved sched class id.
+  ResolvedSchedClass RSC(SubtargetInfo, ID.first, ID.second);
+
+  // 3. Convert ResolvedSchedClass into a 'benchmark point'.
+  // We need Centroid only for the Keys though.
+  return RSC.getAsPoint(Point.Mode, SubtargetInfo, Centroid.getStats());
+}
+
+static void PostProcessPoint(const llvm::MCSubtargetInfo &SubtargetInfo,
+                             const llvm::MCInstrInfo &InstrInfo,
+                             InstructionBenchmark &Point) {
+  assert(Point.Key.Instructions.size() > 1 && "Should have more than 1 instr.");
+
+  // 1. Produce a centroid out of the measured values.
+  // We only need it for the Keys and validatation though.
+  SchedClassClusterCentroid Centroid;
+  Centroid.addPoint(Point.Measurements);
+  if (!Centroid.validate(Point.Mode)) // Ignore error points.
+    return;
+
+  // 2. Replace invalid per-instr value with valid per-snippet value.
+  // The benchmarking code blindly divided per-snippet value by the instr count.
+  llvm::for_each(Point.Measurements, [](BenchmarkMeasure &Measure) {
+    Measure.PerInstructionValue = Measure.PerSnippetValue;
+  });
+
+  // 3. And finally, subtract the SchedClass-specified values of the extra
+  // instructions from the measured values, thus leaving only the value
+  // that actually belongs to the first instruction.
+  for (const llvm::MCInst &Instr :
+       ArrayRef<llvm::MCInst>(Point.Key.Instructions).drop_front()) {
+    std::vector<BenchmarkMeasure> Measures =
+        GetSchedDataAsPoint(SubtargetInfo, InstrInfo, Instr, Point, Centroid);
+    if (Measures.empty()) // Ignore malformed benchmarks. This won't cause
+      return; // corruptions because if this fails it will fail the first time.
+    assert(Point.Measurements.size() == Measures.size() &&
+           "Expected dimensions for measured and computed values to match.");
+    for (const auto &I : llvm::zip(Point.Measurements, Measures))
+      std::get<0>(I).PerInstructionValue -= std::get<1>(I).PerInstructionValue;
+  }
+}
+
+static bool ShouldPostProcess(InstructionBenchmark &Point) {
+  // If the benchmark contains more than one instruction, then we will want to
+  // post-process the measurements to remove the noise from those extra instrs.
+  return Point.Mode == InstructionBenchmark::ModeE::Latency &&
+         Point.Key.Instructions.size() > 1 && !Point.Measurements.empty();
+}
+
+void PostProcessBenchmarkPoints(
+    const llvm::MCSubtargetInfo &SubtargetInfo,
+    const llvm::MCInstrInfo &InstrInfo,
+    llvm::MutableArrayRef<InstructionBenchmark> Points) {
+  for (InstructionBenchmark &Point :
+       llvm::make_filter_range(Points, ShouldPostProcess))
+    PostProcessPoint(SubtargetInfo, InstrInfo, Point);
+}
+
+} // namespace exegesis
+} // namespace llvm
Index: tools/llvm-exegesis/llvm-exegesis.cpp
===================================================================
--- tools/llvm-exegesis/llvm-exegesis.cpp
+++ tools/llvm-exegesis/llvm-exegesis.cpp
@@ -17,6 +17,7 @@
 #include "lib/Clustering.h"
 #include "lib/LlvmState.h"
 #include "lib/PerfHelper.h"
+#include "lib/PostProcessing.h"
 #include "lib/Target.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -449,7 +450,7 @@
   llvm::InitializeNativeTargetDisassembler();
   // Read benchmarks.
   const LLVMState State("");
-  const std::vector<InstructionBenchmark> Points =
+  std::vector<InstructionBenchmark> Points =
       ExitOnErr(InstructionBenchmark::readYamls(State, BenchmarkFile));
   llvm::outs() << "Parsed " << Points.size() << " benchmark points\n";
   if (Points.empty()) {
@@ -467,15 +468,20 @@
     return;
   }
 
+  std::unique_ptr<llvm::MCSubtargetInfo> SubtargetInfo(
+      TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, Points[0].CpuName,
+                                       ""));
   std::unique_ptr<llvm::MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
 
+  PostProcessBenchmarkPoints(*SubtargetInfo, *InstrInfo, Points);
+
   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
       AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
 
-  const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
-                          AnalysisInconsistencyEpsilon,
-                          AnalysisDisplayUnstableOpcodes);
+  const Analysis Analyzer(
+      *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
+      AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes);
 
   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
                                             AnalysisClustersOutputFile);