Index: llvm/trunk/tools/llvm-mca/CMakeLists.txt
===================================================================
--- llvm/trunk/tools/llvm-mca/CMakeLists.txt
+++ llvm/trunk/tools/llvm-mca/CMakeLists.txt
@@ -1,3 +1,5 @@
+include_directories(include)
+
 set(LLVM_LINK_COMPONENTS
   AllTargetsAsmPrinters
   AllTargetsAsmParsers
@@ -12,25 +14,7 @@
 add_llvm_tool(llvm-mca
   llvm-mca.cpp
   CodeRegion.cpp
-  Context.cpp
-  DispatchStage.cpp
-  ExecuteStage.cpp
-  FetchStage.cpp
-  HWEventListener.cpp
-  HardwareUnit.cpp
-  InstrBuilder.cpp
-  Instruction.cpp
-  InstructionTables.cpp
-  LSUnit.cpp
-  Pipeline.cpp
   PipelinePrinter.cpp
-  RegisterFile.cpp
-  ResourceManager.cpp
-  RetireControlUnit.cpp
-  RetireStage.cpp
-  Scheduler.cpp
-  Stage.cpp
-  Support.cpp
   Views/DispatchStatistics.cpp
   Views/InstructionInfoView.cpp
   Views/RegisterFileStatistics.cpp
@@ -41,3 +25,7 @@
   Views/TimelineView.cpp
   Views/View.cpp
   )
+
+set(LLVM_MCA_SOURCE_DIR ${CURRENT_SOURCE_DIR})
+add_subdirectory(lib)
+target_link_libraries(llvm-mca PRIVATE LLVMMCA)
Index: llvm/trunk/tools/llvm-mca/Context.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Context.h
+++ llvm/trunk/tools/llvm-mca/Context.h
@@ -1,68 +0,0 @@
-//===---------------------------- Context.h ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a class for holding ownership of various simulated
-/// hardware units.  A Context also provides a utility routine for constructing
-/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H
-#include "HardwareUnit.h"
-#include "InstrBuilder.h"
-#include "Pipeline.h"
-#include "SourceMgr.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include <memory>
-
-namespace mca {
-
-/// This is a convenience struct to hold the parameters necessary for creating
-/// the pre-built "default" out-of-order pipeline.
-struct PipelineOptions {
-  PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
-                  bool NoAlias)
-      : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
-        StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
-  unsigned DispatchWidth;
-  unsigned RegisterFileSize;
-  unsigned LoadQueueSize;
-  unsigned StoreQueueSize;
-  bool AssumeNoAlias;
-};
-
-class Context {
-  llvm::SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
-  const llvm::MCRegisterInfo &MRI;
-  const llvm::MCSubtargetInfo &STI;
-
-public:
-  Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S)
-      : MRI(R), STI(S) {}
-  Context(const Context &C) = delete;
-  Context &operator=(const Context &C) = delete;
-
-  void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
-    Hardware.push_back(std::move(H));
-  }
-
-  /// Construct a basic pipeline for simulating an out-of-order pipeline.
-  /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
-  std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
-                                                  InstrBuilder &IB,
-                                                  SourceMgr &SrcMgr);
-};
-
-} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H
Index: llvm/trunk/tools/llvm-mca/Context.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Context.cpp
+++ llvm/trunk/tools/llvm-mca/Context.cpp
@@ -1,65 +0,0 @@
-//===---------------------------- Context.cpp -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a class for holding ownership of various simulated
-/// hardware units.  A Context also provides a utility routine for constructing
-/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
-/// stages.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Context.h"
-#include "DispatchStage.h"
-#include "ExecuteStage.h"
-#include "FetchStage.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "RetireStage.h"
-#include "Scheduler.h"
-
-namespace mca {
-
-using namespace llvm;
-
-std::unique_ptr<Pipeline>
-Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
-                               SourceMgr &SrcMgr) {
-  const MCSchedModel &SM = STI.getSchedModel();
-
-  // Create the hardware units defining the backend.
-  auto RCU = llvm::make_unique<RetireControlUnit>(SM);
-  auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
-  auto LSU = llvm::make_unique<LSUnit>(Opts.LoadQueueSize, Opts.StoreQueueSize,
-                                       Opts.AssumeNoAlias);
-  auto HWS = llvm::make_unique<Scheduler>(SM, LSU.get());
-
-  // Create the pipeline and its stages.
-  auto StagePipeline = llvm::make_unique<Pipeline>();
-  auto Fetch = llvm::make_unique<FetchStage>(IB, SrcMgr);
-  auto Dispatch = llvm::make_unique<DispatchStage>(
-      STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF);
-  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
-  auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
-
-  // Pass the ownership of all the hardware units to this Context.
-  addHardwareUnit(std::move(RCU));
-  addHardwareUnit(std::move(PRF));
-  addHardwareUnit(std::move(LSU));
-  addHardwareUnit(std::move(HWS));
-
-  // Build the pipeline.
-  StagePipeline->appendStage(std::move(Fetch));
-  StagePipeline->appendStage(std::move(Dispatch));
-  StagePipeline->appendStage(std::move(Execute));
-  StagePipeline->appendStage(std::move(Retire));
-  return StagePipeline;
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/DispatchStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/DispatchStage.h
+++ llvm/trunk/tools/llvm-mca/DispatchStage.h
@@ -1,95 +0,0 @@
-//===----------------------- DispatchStage.h --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file models the dispatch component of an instruction pipeline.
-///
-/// The DispatchStage is responsible for updating instruction dependencies
-/// and communicating to the simulated instruction scheduler that an instruction
-/// is ready to be scheduled for execution.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
-
-#include "HWEventListener.h"
-#include "Instruction.h"
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace mca {
-
-// Implements the hardware dispatch logic.
-//
-// This class is responsible for the dispatch stage, in which instructions are
-// dispatched in groups to the Scheduler.  An instruction can be dispatched if
-// the following conditions are met:
-//  1) There are enough entries in the reorder buffer (see class
-//     RetireControlUnit) to write the opcodes associated with the instruction.
-//  2) There are enough physical registers to rename output register operands.
-//  3) There are enough entries available in the used buffered resource(s).
-//
-// The number of micro opcodes that can be dispatched in one cycle is limited by
-// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
-// processor resources are not available. Dispatch stall events are counted
-// during the entire execution of the code, and displayed by the performance
-// report when flag '-dispatch-stats' is specified.
-//
-// If the number of micro opcodes exceedes DispatchWidth, then the instruction
-// is dispatched in multiple cycles.
-class DispatchStage final : public Stage {
-  unsigned DispatchWidth;
-  unsigned AvailableEntries;
-  unsigned CarryOver;
-  const llvm::MCSubtargetInfo &STI;
-  RetireControlUnit &RCU;
-  RegisterFile &PRF;
-
-  bool checkRCU(const InstRef &IR) const;
-  bool checkPRF(const InstRef &IR) const;
-  bool canDispatch(const InstRef &IR) const;
-  llvm::Error dispatch(InstRef IR);
-
-  void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
-
-  void notifyInstructionDispatched(const InstRef &IR,
-                                   llvm::ArrayRef<unsigned> UsedPhysRegs);
-
-  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
-                     unsigned RegID) const {
-    return PRF.collectWrites(Vec, RegID);
-  }
-
-public:
-  DispatchStage(const llvm::MCSubtargetInfo &Subtarget,
-                const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
-                unsigned MaxDispatchWidth, RetireControlUnit &R,
-                RegisterFile &F)
-      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {}
-
-  bool isAvailable(const InstRef &IR) const override;
-
-  // The dispatch logic internally doesn't buffer instructions. So there is
-  // never work to do at the beginning of every cycle.
-  bool hasWorkToComplete() const override { return false; }
-  llvm::Error cycleStart() override;
-  llvm::Error execute(InstRef &IR) override;
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
Index: llvm/trunk/tools/llvm-mca/DispatchStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/DispatchStage.cpp
+++ llvm/trunk/tools/llvm-mca/DispatchStage.cpp
@@ -1,160 +0,0 @@
-//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file models the dispatch component of an instruction pipeline.
-///
-/// The DispatchStage is responsible for updating instruction dependencies
-/// and communicating to the simulated instruction scheduler that an instruction
-/// is ready to be scheduled for execution.
-///
-//===----------------------------------------------------------------------===//
-
-#include "DispatchStage.h"
-#include "HWEventListener.h"
-#include "Scheduler.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
-                                                ArrayRef<unsigned> UsedRegs) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs));
-}
-
-bool DispatchStage::checkPRF(const InstRef &IR) const {
-  SmallVector<unsigned, 4> RegDefs;
-  for (const std::unique_ptr<WriteState> &RegDef :
-       IR.getInstruction()->getDefs())
-    RegDefs.emplace_back(RegDef->getRegisterID());
-
-  const unsigned RegisterMask = PRF.isAvailable(RegDefs);
-  // A mask with all zeroes means: register files are available.
-  if (RegisterMask) {
-    notifyEvent<HWStallEvent>(
-        HWStallEvent(HWStallEvent::RegisterFileStall, IR));
-    return false;
-  }
-
-  return true;
-}
-
-bool DispatchStage::checkRCU(const InstRef &IR) const {
-  const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
-  if (RCU.isAvailable(NumMicroOps))
-    return true;
-  notifyEvent<HWStallEvent>(
-      HWStallEvent(HWStallEvent::RetireControlUnitStall, IR));
-  return false;
-}
-
-bool DispatchStage::canDispatch(const InstRef &IR) const {
-  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
-}
-
-void DispatchStage::updateRAWDependencies(ReadState &RS,
-                                          const MCSubtargetInfo &STI) {
-  SmallVector<WriteRef, 4> DependentWrites;
-
-  collectWrites(DependentWrites, RS.getRegisterID());
-  RS.setDependentWrites(DependentWrites.size());
-  // We know that this read depends on all the writes in DependentWrites.
-  // For each write, check if we have ReadAdvance information, and use it
-  // to figure out in how many cycles this read becomes available.
-  const ReadDescriptor &RD = RS.getDescriptor();
-  const MCSchedModel &SM = STI.getSchedModel();
-  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
-  for (WriteRef &WR : DependentWrites) {
-    WriteState &WS = *WR.getWriteState();
-    unsigned WriteResID = WS.getWriteResourceID();
-    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
-    WS.addUser(&RS, ReadAdvance);
-  }
-}
-
-llvm::Error DispatchStage::dispatch(InstRef IR) {
-  assert(!CarryOver && "Cannot dispatch another instruction!");
-  Instruction &IS = *IR.getInstruction();
-  const InstrDesc &Desc = IS.getDesc();
-  const unsigned NumMicroOps = Desc.NumMicroOps;
-  if (NumMicroOps > DispatchWidth) {
-    assert(AvailableEntries == DispatchWidth);
-    AvailableEntries = 0;
-    CarryOver = NumMicroOps - DispatchWidth;
-  } else {
-    assert(AvailableEntries >= NumMicroOps);
-    AvailableEntries -= NumMicroOps;
-  }
-
-  // A dependency-breaking instruction doesn't have to wait on the register
-  // input operands, and it is often optimized at register renaming stage.
-  // Update RAW dependencies if this instruction is not a dependency-breaking
-  // instruction. A dependency-breaking instruction is a zero-latency
-  // instruction that doesn't consume hardware resources.
-  // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
-  bool IsDependencyBreaking = IS.isDependencyBreaking();
-  for (std::unique_ptr<ReadState> &RS : IS.getUses())
-    if (RS->isImplicitRead() || !IsDependencyBreaking)
-      updateRAWDependencies(*RS, STI);
-
-  // By default, a dependency-breaking zero-latency instruction is expected to
-  // be optimized at register renaming stage. That means, no physical register
-  // is allocated to the instruction.
-  bool ShouldAllocateRegisters =
-      !(Desc.isZeroLatency() && IsDependencyBreaking);
-  SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
-  for (std::unique_ptr<WriteState> &WS : IS.getDefs()) {
-    PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles,
-                         ShouldAllocateRegisters);
-  }
-
-  // Reserve slots in the RCU, and notify the instruction that it has been
-  // dispatched to the schedulers for execution.
-  IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
-
-  // Notify listeners of the "instruction dispatched" event,
-  // and move IR to the next stage.
-  notifyInstructionDispatched(IR, RegisterFiles);
-  return moveToTheNextStage(IR);
-}
-
-llvm::Error DispatchStage::cycleStart() {
-  AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
-  CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
-  return llvm::ErrorSuccess();
-}
-
-bool DispatchStage::isAvailable(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
-  if (Required > AvailableEntries)
-    return false;
-  // The dispatch logic doesn't internally buffer instructions.  It only accepts
-  // instructions that can be successfully moved to the next stage during this
-  // same cycle.
-  return canDispatch(IR);
-}
-
-llvm::Error DispatchStage::execute(InstRef &IR) {
-  assert(canDispatch(IR) && "Cannot dispatch another instruction!");
-  return dispatch(IR);
-}
-
-#ifndef NDEBUG
-void DispatchStage::dump() const {
-  PRF.dump();
-  RCU.dump();
-}
-#endif
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/ExecuteStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/ExecuteStage.h
+++ llvm/trunk/tools/llvm-mca/ExecuteStage.h
@@ -1,78 +0,0 @@
-//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of a default instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
-
-#include "Instruction.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "llvm/ADT/ArrayRef.h"
-
-namespace mca {
-
-class ExecuteStage final : public Stage {
-  Scheduler &HWS;
-
-  llvm::Error issueInstruction(InstRef &IR);
-
-  // Called at the beginning of each cycle to issue already dispatched
-  // instructions to the underlying pipelines.
-  llvm::Error issueReadyInstructions();
-
-  ExecuteStage(const ExecuteStage &Other) = delete;
-  ExecuteStage &operator=(const ExecuteStage &Other) = delete;
-
-public:
-  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
-
-  // This stage works under the assumption that the Pipeline will eventually
-  // execute a retire stage. We don't need to check if pipelines and/or
-  // schedulers have instructions to process, because those instructions are
-  // also tracked by the retire control unit. That means,
-  // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
-  // are still instructions in-flight in the out-of-order backend.
-  bool hasWorkToComplete() const override { return false; }
-  bool isAvailable(const InstRef &IR) const override;
-
-  // Notifies the scheduler that a new cycle just started.
-  //
-  // This method notifies the scheduler that a new cycle started.
-  // This method is also responsible for notifying listeners about instructions
-  // state changes, and processor resources freed by the scheduler.
-  // Instructions that transitioned to the 'Executed' state are automatically
-  // moved to the next stage (i.e. RetireStage).
-  llvm::Error cycleStart() override;
-  llvm::Error execute(InstRef &IR) override;
-
-  void
-  notifyInstructionIssued(const InstRef &IR,
-                          llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
-  void notifyInstructionExecuted(const InstRef &IR);
-  void notifyInstructionReady(const InstRef &IR);
-  void notifyResourceAvailable(const ResourceRef &RR);
-
-  // Notify listeners that buffered resources were consumed.
-  void notifyReservedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  // Notify listeners that buffered resources were freed.
-  void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
Index: llvm/trunk/tools/llvm-mca/ExecuteStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/ExecuteStage.cpp
+++ llvm/trunk/tools/llvm-mca/ExecuteStage.cpp
@@ -1,195 +0,0 @@
-//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the execution stage of an instruction pipeline.
-///
-/// The ExecuteStage is responsible for managing the hardware scheduler
-/// and issuing notifications that an instruction has been executed.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ExecuteStage.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-using namespace llvm;
-
-HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
-  switch (Status) {
-  case Scheduler::SC_LOAD_QUEUE_FULL:
-    return HWStallEvent::LoadQueueFull;
-  case Scheduler::SC_STORE_QUEUE_FULL:
-    return HWStallEvent::StoreQueueFull;
-  case Scheduler::SC_BUFFERS_FULL:
-    return HWStallEvent::SchedulerQueueFull;
-  case Scheduler::SC_DISPATCH_GROUP_STALL:
-    return HWStallEvent::DispatchGroupStall;
-  case Scheduler::SC_AVAILABLE:
-    return HWStallEvent::Invalid;
-  }
-
-  llvm_unreachable("Don't know how to process this StallKind!");
-}
-
-bool ExecuteStage::isAvailable(const InstRef &IR) const {
-  if (Scheduler::Status S = HWS.isAvailable(IR)) {
-    HWStallEvent::GenericEventType ET = toHWStallEventType(S);
-    notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
-    return false;
-  }
-
-  return true;
-}
-
-Error ExecuteStage::issueInstruction(InstRef &IR) {
-  SmallVector<std::pair<ResourceRef, double>, 4> Used;
-  SmallVector<InstRef, 4> Ready;
-  HWS.issueInstruction(IR, Used, Ready);
-
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  notifyReleasedBuffers(Desc.Buffers);
-  notifyInstructionIssued(IR, Used);
-  if (IR.getInstruction()->isExecuted()) {
-    notifyInstructionExecuted(IR);
-    //FIXME: add a buffer of executed instructions.
-    if (Error S = moveToTheNextStage(IR))
-      return S;
-  }
-
-  for (const InstRef &I : Ready)
-    notifyInstructionReady(I);
-  return ErrorSuccess();
-}
-
-Error ExecuteStage::issueReadyInstructions() {
-  InstRef IR = HWS.select();
-  while (IR.isValid()) {
-    if (Error Err = issueInstruction(IR))
-      return Err;
-
-    // Select the next instruction to issue.
-    IR = HWS.select();
-  }
-
-  return ErrorSuccess();
-}
-
-Error ExecuteStage::cycleStart() {
-  llvm::SmallVector<ResourceRef, 8> Freed;
-  llvm::SmallVector<InstRef, 4> Executed;
-  llvm::SmallVector<InstRef, 4> Ready;
-
-  HWS.cycleEvent(Freed, Executed, Ready);
-
-  for (const ResourceRef &RR : Freed)
-    notifyResourceAvailable(RR);
-
-  for (InstRef &IR : Executed) {
-    notifyInstructionExecuted(IR);
-    //FIXME: add a buffer of executed instructions.
-    if (Error S = moveToTheNextStage(IR))
-      return S;
-  }
-
-  for (const InstRef &IR : Ready)
-    notifyInstructionReady(IR);
-
-  return issueReadyInstructions();
-}
-
-// Schedule the instruction for execution on the hardware.
-Error ExecuteStage::execute(InstRef &IR) {
-  assert(isAvailable(IR) && "Scheduler is not available!");
-
-#ifndef NDEBUG
-  // Ensure that the HWS has not stored this instruction in its queues.
-  HWS.sanityCheck(IR);
-#endif
-  // Reserve a slot in each buffered resource. Also, mark units with
-  // BufferSize=0 as reserved. Resources with a buffer size of zero will only
-  // be released after MCIS is issued, and all the ResourceCycles for those
-  // units have been consumed.
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  HWS.dispatch(IR);
-  notifyReservedBuffers(Desc.Buffers);
-  if (!HWS.isReady(IR))
-    return ErrorSuccess();
-
-  // If we did not return early, then the scheduler is ready for execution.
-  notifyInstructionReady(IR);
-
-  // If we cannot issue immediately, the HWS will add IR to its ready queue for
-  // execution later, so we must return early here.
-  if (!HWS.mustIssueImmediately(IR))
-    return ErrorSuccess();
-
-  // Issue IR to the underlying pipelines.
-  return issueInstruction(IR);
-}
-
-void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(
-      HWInstructionEvent(HWInstructionEvent::Executed, IR));
-}
-
-void ExecuteStage::notifyInstructionReady(const InstRef &IR) {
-  LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
-  notifyEvent<HWInstructionEvent>(
-      HWInstructionEvent(HWInstructionEvent::Ready, IR));
-}
-
-void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
-  LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
-                    << RR.second << "]\n");
-  for (HWEventListener *Listener : getListeners())
-    Listener->onResourceAvailable(RR);
-}
-
-void ExecuteStage::notifyInstructionIssued(
-    const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
-  LLVM_DEBUG({
-    dbgs() << "[E] Instruction Issued: #" << IR << '\n';
-    for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
-      dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
-             << Resource.first.second << "], ";
-      dbgs() << "cycles: " << Resource.second << '\n';
-    }
-  });
-  notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
-}
-
-void ExecuteStage::notifyReservedBuffers(ArrayRef<uint64_t> Buffers) {
-  if (Buffers.empty())
-    return;
-
-  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
-  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
-                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
-  for (HWEventListener *Listener : getListeners())
-    Listener->onReservedBuffers(BufferIDs);
-}
-
-void ExecuteStage::notifyReleasedBuffers(ArrayRef<uint64_t> Buffers) {
-  if (Buffers.empty())
-    return;
-
-  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
-  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
-                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
-  for (HWEventListener *Listener : getListeners())
-    Listener->onReleasedBuffers(BufferIDs);
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/FetchStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/FetchStage.h
+++ llvm/trunk/tools/llvm-mca/FetchStage.h
@@ -1,52 +0,0 @@
-//===---------------------- FetchStage.h ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline.  Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
-
-#include "InstrBuilder.h"
-#include "SourceMgr.h"
-#include "Stage.h"
-#include <map>
-
-namespace mca {
-
-class FetchStage final : public Stage {
-  std::unique_ptr<Instruction> CurrentInstruction;
-  using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
-  InstMap Instructions;
-  InstrBuilder &IB;
-  SourceMgr &SM;
-
-  // Updates the program counter, and sets 'CurrentInstruction'.
-  llvm::Error getNextInstruction();
-
-  FetchStage(const FetchStage &Other) = delete;
-  FetchStage &operator=(const FetchStage &Other) = delete;
-
-public:
-  FetchStage(InstrBuilder &IB, SourceMgr &SM)
-      : CurrentInstruction(), IB(IB), SM(SM) {}
-
-  bool isAvailable(const InstRef &IR) const override;
-  bool hasWorkToComplete() const override;
-  llvm::Error execute(InstRef &IR) override;
-  llvm::Error cycleStart() override;
-  llvm::Error cycleEnd() override;
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
Index: llvm/trunk/tools/llvm-mca/FetchStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/FetchStage.cpp
+++ llvm/trunk/tools/llvm-mca/FetchStage.cpp
@@ -1,82 +0,0 @@
-//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the Fetch stage of an instruction pipeline.  Its sole
-/// purpose in life is to produce instructions for the rest of the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#include "FetchStage.h"
-
-namespace mca {
-
-bool FetchStage::hasWorkToComplete() const {
-  return CurrentInstruction.get() || SM.hasNext();
-}
-
-bool FetchStage::isAvailable(const InstRef & /* unused */) const {
-  if (!CurrentInstruction)
-    return false;
-  assert(SM.hasNext() && "Unexpected internal state!");
-  const SourceRef SR = SM.peekNext();
-  InstRef IR(SR.first, CurrentInstruction.get());
-  return checkNextStage(IR);
-}
-
-llvm::Error FetchStage::getNextInstruction() {
-  assert(!CurrentInstruction && "There is already an instruction to process!");
-  if (!SM.hasNext())
-    return llvm::ErrorSuccess();
-  const SourceRef SR = SM.peekNext();
-  llvm::Expected<std::unique_ptr<Instruction>> InstOrErr =
-      IB.createInstruction(*SR.second);
-  if (!InstOrErr)
-    return InstOrErr.takeError();
-  CurrentInstruction = std::move(InstOrErr.get());
-  return llvm::ErrorSuccess();
-}
-
-llvm::Error FetchStage::execute(InstRef & /*unused */) {
-  assert(CurrentInstruction && "There is no instruction to process!");
-  const SourceRef SR = SM.peekNext();
-  InstRef IR(SR.first, CurrentInstruction.get());
-  assert(checkNextStage(IR) && "Invalid fetch!");
-
-  Instructions[IR.getSourceIndex()] = std::move(CurrentInstruction);
-  if (llvm::Error Val = moveToTheNextStage(IR))
-    return Val;
-
-  SM.updateNext();
-
-  // Move the program counter.
-  return getNextInstruction();
-}
-
-llvm::Error FetchStage::cycleStart() {
-  if (!CurrentInstruction && SM.hasNext())
-    return getNextInstruction();
-  return llvm::ErrorSuccess();
-}
-
-llvm::Error FetchStage::cycleEnd() {
-  // Find the first instruction which hasn't been retired.
-  const InstMap::iterator It =
-      llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) {
-        return !KeyValuePair.second->isRetired();
-      });
-
-  // Erase instructions up to the first that hasn't been retired.
-  if (It != Instructions.begin())
-    Instructions.erase(Instructions.begin(), It);
-
-  return llvm::ErrorSuccess();
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/HWEventListener.h
===================================================================
--- llvm/trunk/tools/llvm-mca/HWEventListener.h
+++ llvm/trunk/tools/llvm-mca/HWEventListener.h
@@ -1,141 +0,0 @@
-//===----------------------- HWEventListener.h ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the main interface for hardware event listeners.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
-#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
-
-#include "Instruction.h"
-#include "llvm/ADT/ArrayRef.h"
-#include <utility>
-
-namespace mca {
-
-// An HWInstructionEvent represents state changes of instructions that
-// listeners might be interested in. Listeners can choose to ignore any event
-// they are not interested in.
-class HWInstructionEvent {
-public:
-  // This is the list of event types that are shared by all targets, that
-  // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
-  // ...) and generic Views can manipulate.
-  // Subtargets are free to define additional event types, that are goin to be
-  // handled by generic components as opaque values, but can still be
-  // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
-  // DispatchStage, ...) and interpreted by subtarget-specific EventListener
-  // implementations.
-  enum GenericEventType {
-    Invalid = 0,
-    // Events generated by the Retire Control Unit.
-    Retired,
-    // Events generated by the Scheduler.
-    Ready,
-    Issued,
-    Executed,
-    // Events generated by the Dispatch logic.
-    Dispatched,
-
-    LastGenericEventType,
-  };
-
-  HWInstructionEvent(unsigned type, const InstRef &Inst)
-      : Type(type), IR(Inst) {}
-
-  // The event type. The exact meaning depends on the subtarget.
-  const unsigned Type;
-
-  // The instruction this event was generated for.
-  const InstRef &IR;
-};
-
-class HWInstructionIssuedEvent : public HWInstructionEvent {
-public:
-  using ResourceRef = std::pair<uint64_t, uint64_t>;
-  HWInstructionIssuedEvent(const InstRef &IR,
-                           llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
-      : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
-
-  llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
-};
-
-class HWInstructionDispatchedEvent : public HWInstructionEvent {
-public:
-  HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
-      : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
-        UsedPhysRegs(Regs) {}
-  // Number of physical register allocated for this instruction. There is one
-  // entry per register file.
-  llvm::ArrayRef<unsigned> UsedPhysRegs;
-};
-
-class HWInstructionRetiredEvent : public HWInstructionEvent {
-public:
-  HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
-      : HWInstructionEvent(HWInstructionEvent::Retired, IR),
-        FreedPhysRegs(Regs) {}
-  // Number of register writes that have been architecturally committed. There
-  // is one entry per register file.
-  llvm::ArrayRef<unsigned> FreedPhysRegs;
-};
-
-// A HWStallEvent represents a pipeline stall caused by the lack of hardware
-// resources.
-class HWStallEvent {
-public:
-  enum GenericEventType {
-    Invalid = 0,
-    // Generic stall events generated by the DispatchStage.
-    RegisterFileStall,
-    RetireControlUnitStall,
-    // Generic stall events generated by the Scheduler.
-    DispatchGroupStall,
-    SchedulerQueueFull,
-    LoadQueueFull,
-    StoreQueueFull,
-    LastGenericEvent
-  };
-
-  HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {}
-
-  // The exact meaning of the stall event type depends on the subtarget.
-  const unsigned Type;
-
-  // The instruction this event was generated for.
-  const InstRef &IR;
-};
-
-class HWEventListener {
-public:
-  // Generic events generated by the pipeline.
-  virtual void onCycleBegin() {}
-  virtual void onCycleEnd() {}
-
-  virtual void onEvent(const HWInstructionEvent &Event) {}
-  virtual void onEvent(const HWStallEvent &Event) {}
-
-  using ResourceRef = std::pair<uint64_t, uint64_t>;
-  virtual void onResourceAvailable(const ResourceRef &RRef) {}
-
-  // Events generated by the Scheduler when buffered resources are
-  // consumed/freed.
-  virtual void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
-  virtual void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
-
-  virtual ~HWEventListener() {}
-
-private:
-  virtual void anchor();
-};
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/HWEventListener.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/HWEventListener.cpp
+++ llvm/trunk/tools/llvm-mca/HWEventListener.cpp
@@ -1,21 +0,0 @@
-//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a vtable anchor for class HWEventListener.
-///
-//===----------------------------------------------------------------------===//
-
-#include "HWEventListener.h"
-
-namespace mca {
-
-// Anchor the vtable here.
-void HWEventListener::anchor() {}
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/HardwareUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/HardwareUnit.h
+++ llvm/trunk/tools/llvm-mca/HardwareUnit.h
@@ -1,31 +0,0 @@
-//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a base class for describing a simulated hardware
-/// unit.  These units are used to construct a simulated backend.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
-
-namespace mca {
-
-class HardwareUnit {
-  HardwareUnit(const HardwareUnit &H) = delete;
-  HardwareUnit &operator=(const HardwareUnit &H) = delete;
-
-public:
-  HardwareUnit() = default;
-  virtual ~HardwareUnit();
-};
-
-} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
Index: llvm/trunk/tools/llvm-mca/HardwareUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/HardwareUnit.cpp
+++ llvm/trunk/tools/llvm-mca/HardwareUnit.cpp
@@ -1,23 +0,0 @@
-//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the anchor for the base class that describes
-/// simulated hardware units.
-///
-//===----------------------------------------------------------------------===//
-
-#include "HardwareUnit.h"
-
-namespace mca {
-
-// Pin the vtable with this method.
-HardwareUnit::~HardwareUnit() = default;
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/InstrBuilder.h
===================================================================
--- llvm/trunk/tools/llvm-mca/InstrBuilder.h
+++ llvm/trunk/tools/llvm-mca/InstrBuilder.h
@@ -1,90 +0,0 @@
-//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A builder class for instructions that are statically analyzed by llvm-mca.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
-
-#include "Instruction.h"
-#include "Support.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Error.h"
-
-namespace mca {
-
-class DispatchUnit;
-
-/// A builder class that knows how to construct Instruction objects.
-///
-/// Every llvm-mca Instruction is described by an object of class InstrDesc.
-/// An InstrDesc describes which registers are read/written by the instruction,
-/// as well as the instruction latency and hardware resources consumed.
-///
-/// This class is used by the tool to construct Instructions and instruction
-/// descriptors (i.e. InstrDesc objects).
-/// Information from the machine scheduling model is used to identify processor
-/// resources that are consumed by an instruction.
-class InstrBuilder {
-  const llvm::MCSubtargetInfo &STI;
-  const llvm::MCInstrInfo &MCII;
-  const llvm::MCRegisterInfo &MRI;
-  const llvm::MCInstrAnalysis &MCIA;
-  llvm::MCInstPrinter &MCIP;
-  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
-
-  llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
-  llvm::DenseMap<const llvm::MCInst *, std::unique_ptr<const InstrDesc>>
-      VariantDescriptors;
-
-  llvm::Expected<const InstrDesc &>
-  createInstrDescImpl(const llvm::MCInst &MCI);
-  llvm::Expected<const InstrDesc &>
-  getOrCreateInstrDesc(const llvm::MCInst &MCI);
-
-  InstrBuilder(const InstrBuilder &) = delete;
-  InstrBuilder &operator=(const InstrBuilder &) = delete;
-
-  llvm::Error populateWrites(InstrDesc &ID, const llvm::MCInst &MCI,
-                             unsigned SchedClassID);
-  llvm::Error populateReads(InstrDesc &ID, const llvm::MCInst &MCI,
-                            unsigned SchedClassID);
-
-public:
-  InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii,
-               const llvm::MCRegisterInfo &mri,
-               const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip)
-      : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip),
-        ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) {
-    computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
-  }
-
-  // Returns an array of processor resource masks.
-  // Masks are computed by function mca::computeProcResourceMasks. see
-  // Support.h for a description of how masks are computed and how masks can be
-  // used to solve set membership problems.
-  llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
-    return ProcResourceMasks;
-  }
-
-  void clear() { VariantDescriptors.shrink_and_clear(); }
-
-  llvm::Expected<std::unique_ptr<Instruction>>
-  createInstruction(const llvm::MCInst &MCI);
-};
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/InstrBuilder.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/InstrBuilder.cpp
+++ llvm/trunk/tools/llvm-mca/InstrBuilder.cpp
@@ -1,485 +0,0 @@
-//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the InstrBuilder interface.
-///
-//===----------------------------------------------------------------------===//
-
-#include "InstrBuilder.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/WithColor.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-using namespace llvm;
-
-static void initializeUsedResources(InstrDesc &ID,
-                                    const MCSchedClassDesc &SCDesc,
-                                    const MCSubtargetInfo &STI,
-                                    ArrayRef<uint64_t> ProcResourceMasks) {
-  const MCSchedModel &SM = STI.getSchedModel();
-
-  // Populate resources consumed.
-  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
-  std::vector<ResourcePlusCycles> Worklist;
-
-  // Track cycles contributed by resources that are in a "Super" relationship.
-  // This is required if we want to correctly match the behavior of method
-  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
-  // of "consumed" processor resources and resource cycles, the logic in
-  // ExpandProcResource() doesn't update the number of resource cycles
-  // contributed by a "Super" resource to a group.
-  // We need to take this into account when we find that a processor resource is
-  // part of a group, and it is also used as the "Super" of other resources.
-  // This map stores the number of cycles contributed by sub-resources that are
-  // part of a "Super" resource. The key value is the "Super" resource mask ID.
-  DenseMap<uint64_t, unsigned> SuperResources;
-
-  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
-    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
-    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
-    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
-    if (PR.BufferSize != -1)
-      ID.Buffers.push_back(Mask);
-    CycleSegment RCy(0, PRE->Cycles, false);
-    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
-    if (PR.SuperIdx) {
-      uint64_t Super = ProcResourceMasks[PR.SuperIdx];
-      SuperResources[Super] += PRE->Cycles;
-    }
-  }
-
-  // Sort elements by mask popcount, so that we prioritize resource units over
-  // resource groups, and smaller groups over larger groups.
-  llvm::sort(Worklist.begin(), Worklist.end(),
-             [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
-               unsigned popcntA = countPopulation(A.first);
-               unsigned popcntB = countPopulation(B.first);
-               if (popcntA < popcntB)
-                 return true;
-               if (popcntA > popcntB)
-                 return false;
-               return A.first < B.first;
-             });
-
-  uint64_t UsedResourceUnits = 0;
-
-  // Remove cycles contributed by smaller resources.
-  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
-    ResourcePlusCycles &A = Worklist[I];
-    if (!A.second.size()) {
-      A.second.NumUnits = 0;
-      A.second.setReserved();
-      ID.Resources.emplace_back(A);
-      continue;
-    }
-
-    ID.Resources.emplace_back(A);
-    uint64_t NormalizedMask = A.first;
-    if (countPopulation(A.first) == 1) {
-      UsedResourceUnits |= A.first;
-    } else {
-      // Remove the leading 1 from the resource group mask.
-      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
-    }
-
-    for (unsigned J = I + 1; J < E; ++J) {
-      ResourcePlusCycles &B = Worklist[J];
-      if ((NormalizedMask & B.first) == NormalizedMask) {
-        B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
-        if (countPopulation(B.first) > 1)
-          B.second.NumUnits++;
-      }
-    }
-  }
-
-  // A SchedWrite may specify a number of cycles in which a resource group
-  // is reserved. For example (on target x86; cpu Haswell):
-  //
-  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
-  //    let ResourceCycles = [2, 2, 3];
-  //  }
-  //
-  // This means:
-  // Resource units HWPort0 and HWPort1 are both used for 2cy.
-  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
-  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
-  // will not be usable for 2 entire cycles from instruction issue.
-  //
-  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
-  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
-  // extra delay on top of the 2 cycles latency.
-  // During those extra cycles, HWPort01 is not usable by other instructions.
-  for (ResourcePlusCycles &RPC : ID.Resources) {
-    if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
-      // Remove the leading 1 from the resource group mask.
-      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
-      if ((Mask & UsedResourceUnits) == Mask)
-        RPC.second.setReserved();
-    }
-  }
-
-  LLVM_DEBUG({
-    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
-      dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
-    for (const uint64_t R : ID.Buffers)
-      dbgs() << "\t\tBuffer Mask=" << R << '\n';
-  });
-}
-
-static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
-                              const MCSchedClassDesc &SCDesc,
-                              const MCSubtargetInfo &STI) {
-  if (MCDesc.isCall()) {
-    // We cannot estimate how long this call will take.
-    // Artificially set an arbitrarily high latency (100cy).
-    ID.MaxLatency = 100U;
-    return;
-  }
-
-  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
-  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
-  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
-}
-
-Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
-                                   unsigned SchedClassID) {
-  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
-  const MCSchedModel &SM = STI.getSchedModel();
-  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
-
-  // These are for now the (strong) assumptions made by this algorithm:
-  //  * The number of explicit and implicit register definitions in a MCInst
-  //    matches the number of explicit and implicit definitions according to
-  //    the opcode descriptor (MCInstrDesc).
-  //  * Register definitions take precedence over register uses in the operands
-  //    list.
-  //  * If an opcode specifies an optional definition, then the optional
-  //    definition is always the last operand in the sequence, and it can be
-  //    set to zero (i.e. "no register").
-  //
-  // These assumptions work quite well for most out-of-order in-tree targets
-  // like x86. This is mainly because the vast majority of instructions is
-  // expanded to MCInst using a straightforward lowering logic that preserves
-  // the ordering of the operands.
-  unsigned NumExplicitDefs = MCDesc.getNumDefs();
-  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
-  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
-  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
-  if (MCDesc.hasOptionalDef())
-    TotalDefs++;
-  ID.Writes.resize(TotalDefs);
-  // Iterate over the operands list, and skip non-register operands.
-  // The first NumExplictDefs register operands are expected to be register
-  // definitions.
-  unsigned CurrentDef = 0;
-  unsigned i = 0;
-  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
-    const MCOperand &Op = MCI.getOperand(i);
-    if (!Op.isReg())
-      continue;
-
-    WriteDescriptor &Write = ID.Writes[CurrentDef];
-    Write.OpIndex = i;
-    if (CurrentDef < NumWriteLatencyEntries) {
-      const MCWriteLatencyEntry &WLE =
-          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
-      // Conservatively default to MaxLatency.
-      Write.Latency =
-          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
-      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
-    } else {
-      // Assign a default latency for this write.
-      Write.Latency = ID.MaxLatency;
-      Write.SClassOrWriteResourceID = 0;
-    }
-    Write.IsOptionalDef = false;
-    LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
-             << ", Latency=" << Write.Latency
-             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
-    });
-    CurrentDef++;
-  }
-
-  if (CurrentDef != NumExplicitDefs) {
-    return make_error<StringError>(
-        "error: Expected more register operand definitions.",
-        inconvertibleErrorCode());
-  }
-
-  CurrentDef = 0;
-  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
-    unsigned Index = NumExplicitDefs + CurrentDef;
-    WriteDescriptor &Write = ID.Writes[Index];
-    Write.OpIndex = ~CurrentDef;
-    Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
-    if (Index < NumWriteLatencyEntries) {
-      const MCWriteLatencyEntry &WLE =
-          *STI.getWriteLatencyEntry(&SCDesc, Index);
-      // Conservatively default to MaxLatency.
-      Write.Latency =
-          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
-      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
-    } else {
-      // Assign a default latency for this write.
-      Write.Latency = ID.MaxLatency;
-      Write.SClassOrWriteResourceID = 0;
-    }
-
-    Write.IsOptionalDef = false;
-    assert(Write.RegisterID != 0 && "Expected a valid phys register!");
-    LLVM_DEBUG({
-      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
-             << ", PhysReg=" << MRI.getName(Write.RegisterID)
-             << ", Latency=" << Write.Latency
-             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
-    });
-  }
-
-  if (MCDesc.hasOptionalDef()) {
-    // Always assume that the optional definition is the last operand of the
-    // MCInst sequence.
-    const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
-    if (i == MCI.getNumOperands() || !Op.isReg())
-      return make_error<StringError>(
-          "error: expected a register operand for an optional "
-          "definition. Instruction has not be correctly analyzed.",
-          inconvertibleErrorCode());
-
-    WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
-    Write.OpIndex = MCI.getNumOperands() - 1;
-    // Assign a default latency for this write.
-    Write.Latency = ID.MaxLatency;
-    Write.SClassOrWriteResourceID = 0;
-    Write.IsOptionalDef = true;
-  }
-
-  return ErrorSuccess();
-}
-
-Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
-                                  unsigned SchedClassID) {
-  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
-  unsigned NumExplicitDefs = MCDesc.getNumDefs();
-
-  // Skip explicit definitions.
-  unsigned i = 0;
-  for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
-    const MCOperand &Op = MCI.getOperand(i);
-    if (Op.isReg())
-      NumExplicitDefs--;
-  }
-
-  if (NumExplicitDefs) {
-    return make_error<StringError>(
-        "error: Expected more register operand definitions. ",
-        inconvertibleErrorCode());
-  }
-
-  unsigned NumExplicitUses = MCI.getNumOperands() - i;
-  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
-  if (MCDesc.hasOptionalDef()) {
-    assert(NumExplicitUses);
-    NumExplicitUses--;
-  }
-  unsigned TotalUses = NumExplicitUses + NumImplicitUses;
-  if (!TotalUses)
-    return ErrorSuccess();
-
-  ID.Reads.resize(TotalUses);
-  for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
-    ReadDescriptor &Read = ID.Reads[CurrentUse];
-    Read.OpIndex = i + CurrentUse;
-    Read.UseIndex = CurrentUse;
-    Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
-                      << ", UseIndex=" << Read.UseIndex << '\n');
-  }
-
-  for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
-    ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
-    Read.OpIndex = ~CurrentUse;
-    Read.UseIndex = NumExplicitUses + CurrentUse;
-    Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
-    Read.SchedClassID = SchedClassID;
-    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
-                      << MRI.getName(Read.RegisterID) << '\n');
-  }
-  return ErrorSuccess();
-}
-
-Expected<const InstrDesc &>
-InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
-  assert(STI.getSchedModel().hasInstrSchedModel() &&
-         "Itineraries are not yet supported!");
-
-  // Obtain the instruction descriptor from the opcode.
-  unsigned short Opcode = MCI.getOpcode();
-  const MCInstrDesc &MCDesc = MCII.get(Opcode);
-  const MCSchedModel &SM = STI.getSchedModel();
-
-  // Then obtain the scheduling class information from the instruction.
-  unsigned SchedClassID = MCDesc.getSchedClass();
-  unsigned CPUID = SM.getProcessorID();
-
-  // Try to solve variant scheduling classes.
-  if (SchedClassID) {
-    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
-      SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
-
-    if (!SchedClassID) {
-      return make_error<StringError>("unable to resolve this variant class.",
-                                     inconvertibleErrorCode());
-    }
-  }
-
-  // Check if this instruction is supported. Otherwise, report an error.
-  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
-  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
-    std::string ToString;
-    llvm::raw_string_ostream OS(ToString);
-    WithColor::error() << "found an unsupported instruction in the input"
-                       << " assembly sequence.\n";
-    MCIP.printInst(&MCI, OS, "", STI);
-    OS.flush();
-    WithColor::note() << "instruction: " << ToString << '\n';
-    return make_error<StringError>(
-        "Don't know how to analyze unsupported instructions",
-        inconvertibleErrorCode());
-  }
-
-  // Create a new empty descriptor.
-  std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
-  ID->NumMicroOps = SCDesc.NumMicroOps;
-
-  if (MCDesc.isCall()) {
-    // We don't correctly model calls.
-    WithColor::warning() << "found a call in the input assembly sequence.\n";
-    WithColor::note() << "call instructions are not correctly modeled. "
-                      << "Assume a latency of 100cy.\n";
-  }
-
-  if (MCDesc.isReturn()) {
-    WithColor::warning() << "found a return instruction in the input"
-                         << " assembly sequence.\n";
-    WithColor::note() << "program counter updates are ignored.\n";
-  }
-
-  ID->MayLoad = MCDesc.mayLoad();
-  ID->MayStore = MCDesc.mayStore();
-  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
-
-  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
-  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
-  if (auto Err = populateWrites(*ID, MCI, SchedClassID))
-    return std::move(Err);
-  if (auto Err = populateReads(*ID, MCI, SchedClassID))
-    return std::move(Err);
-
-  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
-  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
-
-  // Now add the new descriptor.
-  SchedClassID = MCDesc.getSchedClass();
-  if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
-    Descriptors[MCI.getOpcode()] = std::move(ID);
-    return *Descriptors[MCI.getOpcode()];
-  }
-
-  VariantDescriptors[&MCI] = std::move(ID);
-  return *VariantDescriptors[&MCI];
-}
-
-Expected<const InstrDesc &>
-InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
-  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
-    return *Descriptors[MCI.getOpcode()];
-
-  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
-    return *VariantDescriptors[&MCI];
-
-  return createInstrDescImpl(MCI);
-}
-
-Expected<std::unique_ptr<Instruction>>
-InstrBuilder::createInstruction(const MCInst &MCI) {
-  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
-  if (!DescOrErr)
-    return DescOrErr.takeError();
-  const InstrDesc &D = *DescOrErr;
-  std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
-
-  // Initialize Reads first.
-  for (const ReadDescriptor &RD : D.Reads) {
-    int RegID = -1;
-    if (!RD.isImplicitRead()) {
-      // explicit read.
-      const MCOperand &Op = MCI.getOperand(RD.OpIndex);
-      // Skip non-register operands.
-      if (!Op.isReg())
-        continue;
-      RegID = Op.getReg();
-    } else {
-      // Implicit read.
-      RegID = RD.RegisterID;
-    }
-
-    // Skip invalid register operands.
-    if (!RegID)
-      continue;
-
-    // Okay, this is a register operand. Create a ReadState for it.
-    assert(RegID > 0 && "Invalid register ID found!");
-    NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
-  }
-
-  // Early exit if there are no writes.
-  if (D.Writes.empty())
-    return std::move(NewIS);
-
-  // Track register writes that implicitly clear the upper portion of the
-  // underlying super-registers using an APInt.
-  APInt WriteMask(D.Writes.size(), 0);
-
-  // Now query the MCInstrAnalysis object to obtain information about which
-  // register writes implicitly clear the upper portion of a super-register.
-  MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
-
-  // Check if this is a dependency breaking instruction.
-  if (MCIA.isDependencyBreaking(STI, MCI))
-    NewIS->setDependencyBreaking();
-
-  // Initialize writes.
-  unsigned WriteIndex = 0;
-  for (const WriteDescriptor &WD : D.Writes) {
-    unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
-                                          : MCI.getOperand(WD.OpIndex).getReg();
-    // Check if this is a optional definition that references NoReg.
-    if (WD.IsOptionalDef && !RegID) {
-      ++WriteIndex;
-      continue;
-    }
-
-    assert(RegID && "Expected a valid register ID!");
-    NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
-        WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));
-    ++WriteIndex;
-  }
-
-  return std::move(NewIS);
-}
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/Instruction.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Instruction.h
+++ llvm/trunk/tools/llvm-mca/Instruction.h
@@ -1,449 +0,0 @@
-//===--------------------- Instruction.h ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines abstractions used by the Pipeline to model register reads,
-/// register writes and instructions.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
-
-#include "llvm/Support/MathExtras.h"
-
-#ifndef NDEBUG
-#include "llvm/Support/raw_ostream.h"
-#endif
-
-#include <memory>
-#include <set>
-#include <vector>
-
-namespace mca {
-
-constexpr int UNKNOWN_CYCLES = -512;
-
-/// A register write descriptor.
-struct WriteDescriptor {
-  // Operand index. The index is negative for implicit writes only.
-  // For implicit writes, the actual operand index is computed performing
-  // a bitwise not of the OpIndex.
-  int OpIndex;
-  // Write latency. Number of cycles before write-back stage.
-  unsigned Latency;
-  // This field is set to a value different than zero only if this
-  // is an implicit definition.
-  unsigned RegisterID;
-  // Instruction itineraries would set this field to the SchedClass ID.
-  // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
-  // element associated to this write.
-  // When computing read latencies, this value is matched against the
-  // "ReadAdvance" information. The hardware backend may implement
-  // dedicated forwarding paths to quickly propagate write results to dependent
-  // instructions waiting in the reservation station (effectively bypassing the
-  // write-back stage).
-  unsigned SClassOrWriteResourceID;
-  // True only if this is a write obtained from an optional definition.
-  // Optional definitions are allowed to reference regID zero (i.e. "no
-  // register").
-  bool IsOptionalDef;
-
-  bool isImplicitWrite() const { return OpIndex < 0; };
-};
-
-/// A register read descriptor.
-struct ReadDescriptor {
-  // A MCOperand index. This is used by the Dispatch logic to identify register
-  // reads. Implicit reads have negative indices. The actual operand index of an
-  // implicit read is the bitwise not of field OpIndex.
-  int OpIndex;
-  // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit
-  // uses always come first in the sequence of uses.
-  unsigned UseIndex;
-  // This field is only set if this is an implicit read.
-  unsigned RegisterID;
-  // Scheduling Class Index. It is used to query the scheduling model for the
-  // MCSchedClassDesc object.
-  unsigned SchedClassID;
-
-  bool isImplicitRead() const { return OpIndex < 0; };
-};
-
-class ReadState;
-
-/// Tracks uses of a register definition (e.g. register write).
-///
-/// Each implicit/explicit register write is associated with an instance of
-/// this class. A WriteState object tracks the dependent users of a
-/// register write. It also tracks how many cycles are left before the write
-/// back stage.
-class WriteState {
-  const WriteDescriptor &WD;
-  // On instruction issue, this field is set equal to the write latency.
-  // Before instruction issue, this field defaults to -512, a special
-  // value that represents an "unknown" number of cycles.
-  int CyclesLeft;
-
-  // Actual register defined by this write. This field is only used
-  // to speedup queries on the register file.
-  // For implicit writes, this field always matches the value of
-  // field RegisterID from WD.
-  unsigned RegisterID;
-
-  // True if this write implicitly clears the upper portion of RegisterID's
-  // super-registers.
-  bool ClearsSuperRegs;
-
-  // This field is set if this is a partial register write, and it has a false
-  // dependency on any previous write of the same register (or a portion of it).
-  // DependentWrite must be able to complete before this write completes, so
-  // that we don't break the WAW, and the two writes can be merged together.
-  const WriteState *DependentWrite;
-
-  // Number of writes that are in a WAW dependency with this write.
-  unsigned NumWriteUsers;
-
-  // A list of dependent reads. Users is a set of dependent
-  // reads. A dependent read is added to the set only if CyclesLeft
-  // is "unknown". As soon as CyclesLeft is 'known', each user in the set
-  // gets notified with the actual CyclesLeft.
-
-  // The 'second' element of a pair is a "ReadAdvance" number of cycles.
-  std::set<std::pair<ReadState *, int>> Users;
-
-public:
-  WriteState(const WriteDescriptor &Desc, unsigned RegID,
-             bool clearsSuperRegs = false)
-      : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
-        ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr),
-        NumWriteUsers(0U) {}
-  WriteState(const WriteState &Other) = delete;
-  WriteState &operator=(const WriteState &Other) = delete;
-
-  int getCyclesLeft() const { return CyclesLeft; }
-  unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; }
-  unsigned getRegisterID() const { return RegisterID; }
-  unsigned getLatency() const { return WD.Latency; }
-
-  void addUser(ReadState *Use, int ReadAdvance);
-
-  unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
-  bool clearsSuperRegisters() const { return ClearsSuperRegs; }
-
-  const WriteState *getDependentWrite() const { return DependentWrite; }
-  void setDependentWrite(WriteState *Other) {
-    DependentWrite = Other;
-    ++Other->NumWriteUsers;
-  }
-
-  // On every cycle, update CyclesLeft and notify dependent users.
-  void cycleEvent();
-  void onInstructionIssued();
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-/// Tracks register operand latency in cycles.
-///
-/// A read may be dependent on more than one write. This occurs when some
-/// writes only partially update the register associated to this read.
-class ReadState {
-  const ReadDescriptor &RD;
-  // Physical register identified associated to this read.
-  unsigned RegisterID;
-  // Number of writes that contribute to the definition of RegisterID.
-  // In the absence of partial register updates, the number of DependentWrites
-  // cannot be more than one.
-  unsigned DependentWrites;
-  // Number of cycles left before RegisterID can be read. This value depends on
-  // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES.
-  // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of
-  // every dependent write is known.
-  int CyclesLeft;
-  // This field is updated on every writeStartEvent(). When the number of
-  // dependent writes (i.e. field DependentWrite) is zero, this value is
-  // propagated to field CyclesLeft.
-  unsigned TotalCycles;
-  // This field is set to true only if there are no dependent writes, and
-  // there are no `CyclesLeft' to wait.
-  bool IsReady;
-
-public:
-  ReadState(const ReadDescriptor &Desc, unsigned RegID)
-      : RD(Desc), RegisterID(RegID), DependentWrites(0),
-        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {}
-  ReadState(const ReadState &Other) = delete;
-  ReadState &operator=(const ReadState &Other) = delete;
-
-  const ReadDescriptor &getDescriptor() const { return RD; }
-  unsigned getSchedClass() const { return RD.SchedClassID; }
-  unsigned getRegisterID() const { return RegisterID; }
-
-  bool isReady() const { return IsReady; }
-  bool isImplicitRead() const { return RD.isImplicitRead(); }
-
-  void cycleEvent();
-  void writeStartEvent(unsigned Cycles);
-  void setDependentWrites(unsigned Writes) {
-    DependentWrites = Writes;
-    IsReady = !Writes;
-  }
-};
-
-/// A sequence of cycles.
-///
-/// This class can be used as a building block to construct ranges of cycles.
-class CycleSegment {
-  unsigned Begin; // Inclusive.
-  unsigned End;   // Exclusive.
-  bool Reserved;  // Resources associated to this segment must be reserved.
-
-public:
-  CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false)
-      : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {}
-
-  bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; }
-  bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; }
-  bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; }
-  bool overlaps(const CycleSegment &CS) const {
-    return !startsAfter(CS) && !endsBefore(CS);
-  }
-  bool isExecuting() const { return Begin == 0 && End != 0; }
-  bool isExecuted() const { return End == 0; }
-  bool operator<(const CycleSegment &Other) const {
-    return Begin < Other.Begin;
-  }
-  CycleSegment &operator--(void) {
-    if (Begin)
-      Begin--;
-    if (End)
-      End--;
-    return *this;
-  }
-
-  bool isValid() const { return Begin <= End; }
-  unsigned size() const { return End - Begin; };
-  void Subtract(unsigned Cycles) {
-    assert(End >= Cycles);
-    End -= Cycles;
-  }
-
-  unsigned begin() const { return Begin; }
-  unsigned end() const { return End; }
-  void setEnd(unsigned NewEnd) { End = NewEnd; }
-  bool isReserved() const { return Reserved; }
-  void setReserved() { Reserved = true; }
-};
-
-/// Helper used by class InstrDesc to describe how hardware resources
-/// are used.
-///
-/// This class describes how many resource units of a specific resource kind
-/// (and how many cycles) are "used" by an instruction.
-struct ResourceUsage {
-  CycleSegment CS;
-  unsigned NumUnits;
-  ResourceUsage(CycleSegment Cycles, unsigned Units = 1)
-      : CS(Cycles), NumUnits(Units) {}
-  unsigned size() const { return CS.size(); }
-  bool isReserved() const { return CS.isReserved(); }
-  void setReserved() { CS.setReserved(); }
-};
-
-/// An instruction descriptor
-struct InstrDesc {
-  std::vector<WriteDescriptor> Writes; // Implicit writes are at the end.
-  std::vector<ReadDescriptor> Reads;   // Implicit reads are at the end.
-
-  // For every resource used by an instruction of this kind, this vector
-  // reports the number of "consumed cycles".
-  std::vector<std::pair<uint64_t, ResourceUsage>> Resources;
-
-  // A list of buffered resources consumed by this instruction.
-  std::vector<uint64_t> Buffers;
-  unsigned MaxLatency;
-  // Number of MicroOps for this instruction.
-  unsigned NumMicroOps;
-
-  bool MayLoad;
-  bool MayStore;
-  bool HasSideEffects;
-
-  // A zero latency instruction doesn't consume any scheduler resources.
-  bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
-};
-
-/// An instruction propagated through the simulated instruction pipeline.
-///
-/// This class is used to monitor changes to the internal state of instructions
-/// that are sent to the various components of the simulated hardware pipeline.
-class Instruction {
-  const InstrDesc &Desc;
-
-  enum InstrStage {
-    IS_INVALID,   // Instruction in an invalid state.
-    IS_AVAILABLE, // Instruction dispatched but operands are not ready.
-    IS_READY,     // Instruction dispatched and operands ready.
-    IS_EXECUTING, // Instruction issued.
-    IS_EXECUTED,  // Instruction executed. Values are written back.
-    IS_RETIRED    // Instruction retired.
-  };
-
-  // The current instruction stage.
-  enum InstrStage Stage;
-
-  // This value defaults to the instruction latency. This instruction is
-  // considered executed when field CyclesLeft goes to zero.
-  int CyclesLeft;
-
-  // Retire Unit token ID for this instruction.
-  unsigned RCUTokenID;
-
-  bool IsDepBreaking;
-
-  using UniqueDef = std::unique_ptr<WriteState>;
-  using UniqueUse = std::unique_ptr<ReadState>;
-  using VecDefs = std::vector<UniqueDef>;
-  using VecUses = std::vector<UniqueUse>;
-
-  // Output dependencies.
-  // One entry per each implicit and explicit register definition.
-  VecDefs Defs;
-
-  // Input dependencies.
-  // One entry per each implicit and explicit register use.
-  VecUses Uses;
-
-public:
-  Instruction(const InstrDesc &D)
-      : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
-        IsDepBreaking(false) {}
-  Instruction(const Instruction &Other) = delete;
-  Instruction &operator=(const Instruction &Other) = delete;
-
-  VecDefs &getDefs() { return Defs; }
-  const VecDefs &getDefs() const { return Defs; }
-  VecUses &getUses() { return Uses; }
-  const VecUses &getUses() const { return Uses; }
-  const InstrDesc &getDesc() const { return Desc; }
-  unsigned getRCUTokenID() const { return RCUTokenID; }
-  int getCyclesLeft() const { return CyclesLeft; }
-
-  bool hasDependentUsers() const {
-    return std::any_of(Defs.begin(), Defs.end(), [](const UniqueDef &Def) {
-      return Def->getNumUsers() > 0;
-    });
-  }
-
-  bool isDependencyBreaking() const { return IsDepBreaking; }
-  void setDependencyBreaking() { IsDepBreaking = true; }
-
-  unsigned getNumUsers() const {
-    unsigned NumUsers = 0;
-    for (const UniqueDef &Def : Defs)
-      NumUsers += Def->getNumUsers();
-    return NumUsers;
-  }
-
-  // Transition to the dispatch stage, and assign a RCUToken to this
-  // instruction. The RCUToken is used to track the completion of every
-  // register write performed by this instruction.
-  void dispatch(unsigned RCUTokenID);
-
-  // Instruction issued. Transition to the IS_EXECUTING state, and update
-  // all the definitions.
-  void execute();
-
-  // Force a transition from the IS_AVAILABLE state to the IS_READY state if
-  // input operands are all ready. State transitions normally occur at the
-  // beginning of a new cycle (see method cycleEvent()). However, the scheduler
-  // may decide to promote instructions from the wait queue to the ready queue
-  // as the result of another issue event.  This method is called every time the
-  // instruction might have changed in state.
-  void update();
-
-  bool isDispatched() const { return Stage == IS_AVAILABLE; }
-  bool isReady() const { return Stage == IS_READY; }
-  bool isExecuting() const { return Stage == IS_EXECUTING; }
-  bool isExecuted() const { return Stage == IS_EXECUTED; }
-  bool isRetired() const { return Stage == IS_RETIRED; }
-
-  void retire() {
-    assert(isExecuted() && "Instruction is in an invalid state!");
-    Stage = IS_RETIRED;
-  }
-
-  void cycleEvent();
-};
-
-/// An InstRef contains both a SourceMgr index and Instruction pair.  The index
-/// is used as a unique identifier for the instruction.  MCA will make use of
-/// this index as a key throughout MCA.
-class InstRef : public std::pair<unsigned, Instruction *> {
-public:
-  InstRef() : std::pair<unsigned, Instruction *>(0, nullptr) {}
-  InstRef(unsigned Index, Instruction *I)
-      : std::pair<unsigned, Instruction *>(Index, I) {}
-
-  unsigned getSourceIndex() const { return first; }
-  Instruction *getInstruction() { return second; }
-  const Instruction *getInstruction() const { return second; }
-
-  /// Returns true if this references a valid instruction.
-  bool isValid() const { return second != nullptr; }
-
-  /// Invalidate this reference.
-  void invalidate() { second = nullptr; }
-
-#ifndef NDEBUG
-  void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); }
-#endif
-};
-
-#ifndef NDEBUG
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) {
-  IR.print(OS);
-  return OS;
-}
-#endif
-
-/// A reference to a register write.
-///
-/// This class is mainly used by the register file to describe register
-/// mappings. It correlates a register write to the source index of the
-/// defining instruction.
-class WriteRef {
-  std::pair<unsigned, WriteState *> Data;
-  static const unsigned INVALID_IID;
-
-public:
-  WriteRef() : Data(INVALID_IID, nullptr) {}
-  WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
-
-  unsigned getSourceIndex() const { return Data.first; }
-  const WriteState *getWriteState() const { return Data.second; }
-  WriteState *getWriteState() { return Data.second; }
-  void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); }
-
-  bool isValid() const {
-    return Data.first != INVALID_IID && Data.second != nullptr;
-  }
-  bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/Instruction.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Instruction.cpp
+++ llvm/trunk/tools/llvm-mca/Instruction.cpp
@@ -1,177 +0,0 @@
-//===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines abstractions used by the Pipeline to model register reads,
-// register writes and instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Instruction.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mca {
-
-using namespace llvm;
-
-void ReadState::writeStartEvent(unsigned Cycles) {
-  assert(DependentWrites);
-  assert(CyclesLeft == UNKNOWN_CYCLES);
-
-  // This read may be dependent on more than one write. This typically occurs
-  // when a definition is the result of multiple writes where at least one
-  // write does a partial register update.
-  // The HW is forced to do some extra bookkeeping to track of all the
-  // dependent writes, and implement a merging scheme for the partial writes.
-  --DependentWrites;
-  TotalCycles = std::max(TotalCycles, Cycles);
-
-  if (!DependentWrites) {
-    CyclesLeft = TotalCycles;
-    IsReady = !CyclesLeft;
-  }
-}
-
-void WriteState::onInstructionIssued() {
-  assert(CyclesLeft == UNKNOWN_CYCLES);
-  // Update the number of cycles left based on the WriteDescriptor info.
-  CyclesLeft = getLatency();
-
-  // Now that the time left before write-back is known, notify
-  // all the users.
-  for (const std::pair<ReadState *, int> &User : Users) {
-    ReadState *RS = User.first;
-    unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
-    RS->writeStartEvent(ReadCycles);
-  }
-}
-
-void WriteState::addUser(ReadState *User, int ReadAdvance) {
-  // If CyclesLeft is different than -1, then we don't need to
-  // update the list of users. We can just notify the user with
-  // the actual number of cycles left (which may be zero).
-  if (CyclesLeft != UNKNOWN_CYCLES) {
-    unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
-    User->writeStartEvent(ReadCycles);
-    return;
-  }
-
-  std::pair<ReadState *, int> NewPair(User, ReadAdvance);
-  Users.insert(NewPair);
-}
-
-void WriteState::cycleEvent() {
-  // Note: CyclesLeft can be a negative number. It is an error to
-  // make it an unsigned quantity because users of this write may
-  // specify a negative ReadAdvance.
-  if (CyclesLeft != UNKNOWN_CYCLES)
-    CyclesLeft--;
-}
-
-void ReadState::cycleEvent() {
-  // Update the total number of cycles.
-  if (DependentWrites && TotalCycles) {
-    --TotalCycles;
-    return;
-  }
-
-  // Bail out immediately if we don't know how many cycles are left.
-  if (CyclesLeft == UNKNOWN_CYCLES)
-    return;
-
-  if (CyclesLeft) {
-    --CyclesLeft;
-    IsReady = !CyclesLeft;
-  }
-}
-
-#ifndef NDEBUG
-void WriteState::dump() const {
-  dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID "
-         << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
-}
-
-void WriteRef::dump() const {
-  dbgs() << "IID=" << getSourceIndex() << ' ';
-  if (isValid())
-    getWriteState()->dump();
-  else
-    dbgs() << "(null)";
-}
-#endif
-
-void Instruction::dispatch(unsigned RCUToken) {
-  assert(Stage == IS_INVALID);
-  Stage = IS_AVAILABLE;
-  RCUTokenID = RCUToken;
-
-  // Check if input operands are already available.
-  update();
-}
-
-void Instruction::execute() {
-  assert(Stage == IS_READY);
-  Stage = IS_EXECUTING;
-
-  // Set the cycles left before the write-back stage.
-  CyclesLeft = Desc.MaxLatency;
-
-  for (UniqueDef &Def : Defs)
-    Def->onInstructionIssued();
-
-  // Transition to the "executed" stage if this is a zero-latency instruction.
-  if (!CyclesLeft)
-    Stage = IS_EXECUTED;
-}
-
-void Instruction::update() {
-  assert(isDispatched() && "Unexpected instruction stage found!");
-
-  if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
-    return;
-
-  // A partial register write cannot complete before a dependent write.
-  auto IsDefReady = [&](const UniqueDef &Def) {
-    if (const WriteState *Write = Def->getDependentWrite()) {
-      int WriteLatency = Write->getCyclesLeft();
-      if (WriteLatency == UNKNOWN_CYCLES)
-        return false;
-      return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
-    }
-    return true;
-  };
-
-  if (llvm::all_of(Defs, IsDefReady))
-    Stage = IS_READY;
-}
-
-void Instruction::cycleEvent() {
-  if (isReady())
-    return;
-
-  if (isDispatched()) {
-    for (UniqueUse &Use : Uses)
-      Use->cycleEvent();
-
-    update();
-    return;
-  }
-
-  assert(isExecuting() && "Instruction not in-flight?");
-  assert(CyclesLeft && "Instruction already executed?");
-  for (UniqueDef &Def : Defs)
-    Def->cycleEvent();
-  CyclesLeft--;
-  if (!CyclesLeft)
-    Stage = IS_EXECUTED;
-}
-
-const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/InstructionTables.h
===================================================================
--- llvm/trunk/tools/llvm-mca/InstructionTables.h
+++ llvm/trunk/tools/llvm-mca/InstructionTables.h
@@ -1,42 +0,0 @@
-//===--------------------- InstructionTables.h ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements a custom stage to generate instruction tables.
-/// See the description of command-line flag -instruction-tables in
-/// docs/CommandGuide/lvm-mca.rst
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
-#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
-
-#include "InstrBuilder.h"
-#include "Scheduler.h"
-#include "Stage.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-class InstructionTables final : public Stage {
-  const llvm::MCSchedModel &SM;
-  InstrBuilder &IB;
-  llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
-
-public:
-  InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
-      : Stage(), SM(Model), IB(Builder) {}
-
-  bool hasWorkToComplete() const override { return false; }
-  llvm::Error execute(InstRef &IR) override;
-};
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/InstructionTables.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/InstructionTables.cpp
+++ llvm/trunk/tools/llvm-mca/InstructionTables.cpp
@@ -1,70 +0,0 @@
-//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements the method InstructionTables::execute().
-/// Method execute() prints a theoretical resource pressure distribution based
-/// on the information available in the scheduling model, and without running
-/// the pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#include "InstructionTables.h"
-
-namespace mca {
-
-using namespace llvm;
-
-Error InstructionTables::execute(InstRef &IR) {
-  ArrayRef<uint64_t> Masks = IB.getProcResourceMasks();
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  UsedResources.clear();
-
-  // Identify the resources consumed by this instruction.
-  for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) {
-    // Skip zero-cycle resources (i.e., unused resources).
-    if (!Resource.second.size())
-      continue;
-    double Cycles = static_cast<double>(Resource.second.size());
-    unsigned Index = std::distance(
-        Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
-    const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
-    unsigned NumUnits = ProcResource.NumUnits;
-    if (!ProcResource.SubUnitsIdxBegin) {
-      // The number of cycles consumed by each unit.
-      Cycles /= NumUnits;
-      for (unsigned I = 0, E = NumUnits; I < E; ++I) {
-        ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
-        UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
-      }
-      continue;
-    }
-
-    // This is a group. Obtain the set of resources contained in this
-    // group. Some of these resources may implement multiple units.
-    // Uniformly distribute Cycles across all of the units.
-    for (unsigned I1 = 0; I1 < NumUnits; ++I1) {
-      unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
-      const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
-      // Compute the number of cycles consumed by each resource unit.
-      double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
-      for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
-        ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
-        UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
-      }
-    }
-  }
-
-  // Send a fake instruction issued event to all the views.
-  HWInstructionIssuedEvent Event(IR, UsedResources);
-  notifyEvent<HWInstructionIssuedEvent>(Event);
-  return ErrorSuccess();
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/LSUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/LSUnit.h
+++ llvm/trunk/tools/llvm-mca/LSUnit.h
@@ -1,161 +0,0 @@
-//===------------------------- LSUnit.h --------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load/Store unit class that models load/store queues and that implements
-/// a simple weak memory consistency model.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H
-
-#include "HardwareUnit.h"
-#include <set>
-
-namespace mca {
-
-class InstRef;
-struct InstrDesc;
-
-/// A Load/Store Unit implementing a load and store queues.
-///
-/// This class implements a load queue and a store queue to emulate the
-/// out-of-order execution of memory operations.
-/// Each load (or store) consumes an entry in the load (or store) queue.
-///
-/// Rules are:
-/// 1) A younger load is allowed to pass an older load only if there are no
-///    stores nor barriers in between the two loads.
-/// 2) An younger store is not allowed to pass an older store.
-/// 3) A younger store is not allowed to pass an older load.
-/// 4) A younger load is allowed to pass an older store only if the load does
-///    not alias with the store.
-///
-/// This class optimistically assumes that loads don't alias store operations.
-/// Under this assumption, younger loads are always allowed to pass older
-/// stores (this would only affects rule 4).
-/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to
-/// predict when loads and stores don't alias with eachother.
-///
-/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
-/// set to `false` by the constructor of LSUnit.
-///
-/// In the case of write-combining memory, rule 2. could be relaxed to allow
-/// reordering of non-aliasing store operations. At the moment, this is not
-/// allowed.
-/// To put it in another way, there is no option to specify a different memory
-/// type for memory operations (example: write-through, write-combining, etc.).
-/// Also, there is no way to weaken the memory model, and this unit currently
-/// doesn't support write-combining behavior.
-///
-/// No assumptions are made on the size of the store buffer.
-/// As mentioned before, this class doesn't perform alias analysis.
-/// Consequently,  LSUnit doesn't know how to identify cases where
-/// store-to-load forwarding may occur.
-///
-/// LSUnit doesn't attempt to predict whether a load or store hits or misses
-/// the L1 cache. To be more specific, LSUnit doesn't know anything about
-/// the cache hierarchy and memory types.
-/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
-/// scheduling model provides an "optimistic" load-to-use latency (which usually
-/// matches the load-to-use latency for when there is a hit in the L1D).
-///
-/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
-/// memory-barrier like instructions.
-/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
-/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
-/// serializes loads without forcing a flush of the load queue.
-/// Similarly, instructions that both `mayStore` and have `unmodeled side
-/// effects` are treated like store barriers. A full memory
-/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
-/// effects. This is obviously inaccurate, but this is the best that we can do
-/// at the moment.
-///
-/// Each load/store barrier consumes one entry in the load/store queue. A
-/// load/store barrier enforces ordering of loads/stores:
-///  - A younger load cannot pass a load barrier.
-///  - A younger store cannot pass a store barrier.
-///
-/// A younger load has to wait for the memory load barrier to execute.
-/// A load/store barrier is "executed" when it becomes the oldest entry in
-/// the load/store queue(s). That also means, all the older loads/stores have
-/// already been executed.
-class LSUnit : public HardwareUnit {
-  // Load queue size.
-  // LQ_Size == 0 means that there are infinite slots in the load queue.
-  unsigned LQ_Size;
-
-  // Store queue size.
-  // SQ_Size == 0 means that there are infinite slots in the store queue.
-  unsigned SQ_Size;
-
-  // If true, loads will never alias with stores. This is the default.
-  bool NoAlias;
-
-  std::set<unsigned> LoadQueue;
-  std::set<unsigned> StoreQueue;
-
-  void assignLQSlot(unsigned Index);
-  void assignSQSlot(unsigned Index);
-  bool isReadyNoAlias(unsigned Index) const;
-
-  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
-  // conservatively treated as a store barrier. It forces older store to be
-  // executed before newer stores are issued.
-  std::set<unsigned> StoreBarriers;
-
-  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
-  // conservatively treated as a load barrier. It forces older loads to execute
-  // before newer loads are issued.
-  std::set<unsigned> LoadBarriers;
-
-  bool isSQEmpty() const { return StoreQueue.empty(); }
-  bool isLQEmpty() const { return LoadQueue.empty(); }
-  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
-  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
-
-public:
-  LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false)
-      : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {}
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-
-  enum Status {
-    LSU_AVAILABLE = 0,
-    LSU_LQUEUE_FULL,
-    LSU_SQUEUE_FULL
-  };
-
-  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
-  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
-  Status isAvailable(const InstRef &IR) const;
-
-  // Allocates load/store queue resources for IR.
-  //
-  // This method assumes that a previous call to `isAvailable(IR)` returned
-  // LSU_AVAILABLE, and that IR is a memory operation.
-  void dispatch(const InstRef &IR);
-
-  // By default, rules are:
-  // 1. A store may not pass a previous store.
-  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
-  // 3. A load may pass a previous load.
-  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
-  // 5. A load has to wait until an older load barrier is fully executed.
-  // 6. A store has to wait until an older store barrier is fully executed.
-  virtual bool isReady(const InstRef &IR) const;
-  void onInstructionExecuted(const InstRef &IR);
-};
-
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/LSUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/LSUnit.cpp
+++ llvm/trunk/tools/llvm-mca/LSUnit.cpp
@@ -1,157 +0,0 @@
-//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A Load-Store Unit for the llvm-mca tool.
-///
-//===----------------------------------------------------------------------===//
-
-#include "LSUnit.h"
-#include "Instruction.h"
-
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-#ifndef NDEBUG
-void LSUnit::dump() const {
-  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
-  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
-  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
-  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
-}
-#endif
-
-void LSUnit::assignLQSlot(unsigned Index) {
-  assert(!isLQFull());
-  assert(LoadQueue.count(Index) == 0);
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
-                    << ",slot=" << LoadQueue.size() << ">\n");
-  LoadQueue.insert(Index);
-}
-
-void LSUnit::assignSQSlot(unsigned Index) {
-  assert(!isSQFull());
-  assert(StoreQueue.count(Index) == 0);
-
-  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
-                    << ",slot=" << StoreQueue.size() << ">\n");
-  StoreQueue.insert(Index);
-}
-
-void LSUnit::dispatch(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  unsigned IsMemBarrier = Desc.HasSideEffects;
-  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
-
-  const unsigned Index = IR.getSourceIndex();
-  if (Desc.MayLoad) {
-    if (IsMemBarrier)
-      LoadBarriers.insert(Index);
-    assignLQSlot(Index);
-  }
-
-  if (Desc.MayStore) {
-    if (IsMemBarrier)
-      StoreBarriers.insert(Index);
-    assignSQSlot(Index);
-  }
-}
-
-LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  if (Desc.MayLoad && isLQFull())
-    return LSUnit::LSU_LQUEUE_FULL;
-  if (Desc.MayStore && isSQFull())
-    return LSUnit::LSU_SQUEUE_FULL;
-  return LSUnit::LSU_AVAILABLE;
-}
-
-bool LSUnit::isReady(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  const unsigned Index = IR.getSourceIndex();
-  bool IsALoad = Desc.MayLoad;
-  bool IsAStore = Desc.MayStore;
-  assert((IsALoad || IsAStore) && "Not a memory operation!");
-  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
-  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
-
-  if (IsALoad && !LoadBarriers.empty()) {
-    unsigned LoadBarrierIndex = *LoadBarriers.begin();
-    if (Index > LoadBarrierIndex)
-      return false;
-    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
-      return false;
-  }
-
-  if (IsAStore && !StoreBarriers.empty()) {
-    unsigned StoreBarrierIndex = *StoreBarriers.begin();
-    if (Index > StoreBarrierIndex)
-      return false;
-    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
-      return false;
-  }
-
-  if (NoAlias && IsALoad)
-    return true;
-
-  if (StoreQueue.size()) {
-    // Check if this memory operation is younger than the older store.
-    if (Index > *StoreQueue.begin())
-      return false;
-  }
-
-  // Okay, we are older than the oldest store in the queue.
-  // If there are no pending loads, then we can say for sure that this
-  // instruction is ready.
-  if (isLQEmpty())
-    return true;
-
-  // Check if there are no older loads.
-  if (Index <= *LoadQueue.begin())
-    return true;
-
-  // There is at least one younger load.
-  return !IsAStore;
-}
-
-void LSUnit::onInstructionExecuted(const InstRef &IR) {
-  const unsigned Index = IR.getSourceIndex();
-  std::set<unsigned>::iterator it = LoadQueue.find(Index);
-  if (it != LoadQueue.end()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the load queue.\n");
-    LoadQueue.erase(it);
-  }
-
-  it = StoreQueue.find(Index);
-  if (it != StoreQueue.end()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the store queue.\n");
-    StoreQueue.erase(it);
-  }
-
-  if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the set of store barriers.\n");
-    StoreBarriers.erase(StoreBarriers.begin());
-  }
-  if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
-    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
-                      << " has been removed from the set of load barriers.\n");
-    LoadBarriers.erase(LoadBarriers.begin());
-  }
-}
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/Pipeline.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Pipeline.h
+++ llvm/trunk/tools/llvm-mca/Pipeline.h
@@ -1,76 +0,0 @@
-//===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements an ordered container of stages that simulate the
-/// pipeline of a hardware backend.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H
-#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H
-
-#include "Scheduler.h"
-#include "Stage.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Error.h"
-
-namespace mca {
-
-class HWEventListener;
-class HWInstructionEvent;
-class HWStallEvent;
-
-/// A pipeline for a specific subtarget.
-///
-/// It emulates an out-of-order execution of instructions. Instructions are
-/// fetched from a MCInst sequence managed by an initial 'Fetch' stage.
-/// Instructions are firstly fetched, then dispatched to the schedulers, and
-/// then executed.
-///
-/// This class tracks the lifetime of an instruction from the moment where
-/// it gets dispatched to the schedulers, to the moment where it finishes
-/// executing and register writes are architecturally committed.
-/// In particular, it monitors changes in the state of every instruction
-/// in flight.
-///
-/// Instructions are executed in a loop of iterations. The number of iterations
-/// is defined by the SourceMgr object, which is managed by the initial stage
-/// of the instruction pipeline.
-///
-/// The Pipeline entry point is method 'run()' which executes cycles in a loop
-/// until there are new instructions to dispatch, and not every instruction
-/// has been retired.
-///
-/// Internally, the Pipeline collects statistical information in the form of
-/// histograms. For example, it tracks how the dispatch group size changes
-/// over time.
-class Pipeline {
-  Pipeline(const Pipeline &P) = delete;
-  Pipeline &operator=(const Pipeline &P) = delete;
-
-  /// An ordered list of stages that define this instruction pipeline.
-  llvm::SmallVector<std::unique_ptr<Stage>, 8> Stages;
-  std::set<HWEventListener *> Listeners;
-  unsigned Cycles;
-
-  llvm::Error runCycle();
-  bool hasWorkToProcess();
-  void notifyCycleBegin();
-  void notifyCycleEnd();
-
-public:
-  Pipeline() : Cycles(0) {}
-  void appendStage(std::unique_ptr<Stage> S);
-  llvm::Error run();
-  void addEventListener(HWEventListener *Listener);
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H
Index: llvm/trunk/tools/llvm-mca/Pipeline.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Pipeline.cpp
+++ llvm/trunk/tools/llvm-mca/Pipeline.cpp
@@ -1,97 +0,0 @@
-//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements an ordered container of stages that simulate the
-/// pipeline of a hardware backend.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Pipeline.h"
-#include "HWEventListener.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/Debug.h"
-
-namespace mca {
-
-#define DEBUG_TYPE "llvm-mca"
-
-using namespace llvm;
-
-void Pipeline::addEventListener(HWEventListener *Listener) {
-  if (Listener)
-    Listeners.insert(Listener);
-  for (auto &S : Stages)
-    S->addListener(Listener);
-}
-
-bool Pipeline::hasWorkToProcess() {
-  return llvm::any_of(Stages, [](const std::unique_ptr<Stage> &S) {
-    return S->hasWorkToComplete();
-  });
-}
-
-llvm::Error Pipeline::run() {
-  assert(!Stages.empty() && "Unexpected empty pipeline found!");
-
-  while (hasWorkToProcess()) {
-    notifyCycleBegin();
-    if (llvm::Error Err = runCycle())
-      return Err;
-    notifyCycleEnd();
-    ++Cycles;
-  }
-  return llvm::ErrorSuccess();
-}
-
-llvm::Error Pipeline::runCycle() {
-  llvm::Error Err = llvm::ErrorSuccess();
-  // Update stages before we start processing new instructions.
-  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
-    const std::unique_ptr<Stage> &S = *I;
-    Err = S->cycleStart();
-  }
-
-  // Now fetch and execute new instructions.
-  InstRef IR;
-  Stage &FirstStage = *Stages[0];
-  while (!Err && FirstStage.isAvailable(IR))
-    Err = FirstStage.execute(IR);
-
-  // Update stages in preparation for a new cycle.
-  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
-    const std::unique_ptr<Stage> &S = *I;
-    Err = S->cycleEnd();
-  }
-
-  return Err;
-}
-
-void Pipeline::appendStage(std::unique_ptr<Stage> S) {
-  assert(S && "Invalid null stage in input!");
-  if (!Stages.empty()) {
-    Stage *Last = Stages.back().get();
-    Last->setNextInSequence(S.get());
-  }
-
-  Stages.push_back(std::move(S));
-}
-
-void Pipeline::notifyCycleBegin() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
-  for (HWEventListener *Listener : Listeners)
-    Listener->onCycleBegin();
-}
-
-void Pipeline::notifyCycleEnd() {
-  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
-  for (HWEventListener *Listener : Listeners)
-    Listener->onCycleEnd();
-}
-} // namespace mca.
Index: llvm/trunk/tools/llvm-mca/RegisterFile.h
===================================================================
--- llvm/trunk/tools/llvm-mca/RegisterFile.h
+++ llvm/trunk/tools/llvm-mca/RegisterFile.h
@@ -1,171 +0,0 @@
-//===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a register mapping file class.  This class is responsible
-/// for managing hardware register files and the tracking of data dependencies
-/// between registers.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
-#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
-
-#include "HardwareUnit.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/Support/Error.h"
-
-namespace mca {
-
-class ReadState;
-class WriteState;
-class WriteRef;
-
-/// Manages hardware register files, and tracks register definitions for
-/// register renaming purposes.
-class RegisterFile : public HardwareUnit {
-  const llvm::MCRegisterInfo &MRI;
-
-  // Each register file is associated with an instance of
-  // RegisterMappingTracker.
-  // A RegisterMappingTracker keeps track of the number of physical registers
-  // which have been dynamically allocated by the simulator.
-  struct RegisterMappingTracker {
-    // The total number of physical registers that are available in this
-    // register file for register renaming purpouses.  A value of zero for this
-    // field means: this register file has an unbounded number of physical
-    // registers.
-    const unsigned NumPhysRegs;
-    // Number of physical registers that are currently in use.
-    unsigned NumUsedPhysRegs;
-
-    RegisterMappingTracker(unsigned NumPhysRegisters)
-        : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {}
-  };
-
-  // A vector of register file descriptors.  This set always contains at least
-  // one entry. Entry at index #0 is reserved.  That entry describes a register
-  // file with an unbounded number of physical registers that "sees" all the
-  // hardware registers declared by the target (i.e. all the register
-  // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is
-  // the target name).
-  //
-  // Users can limit the number of physical registers that are available in
-  // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
-  llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
-
-  // This type is used to propagate information about the owner of a register,
-  // and the cost of allocating it in the PRF. Register cost is defined as the
-  // number of physical registers consumed by the PRF to allocate a user
-  // register.
-  //
-  // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
-  // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
-  using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
-
-  // Struct RegisterRenamingInfo maps registers to register files.
-  // There is a RegisterRenamingInfo object for every register defined by
-  // the target. RegisteRenamingInfo objects are stored into vector
-  // RegisterMappings, and register IDs can be used to reference them.
-  struct RegisterRenamingInfo {
-    IndexPlusCostPairTy IndexPlusCost;
-    llvm::MCPhysReg RenameAs;
-  };
-
-  // RegisterMapping objects are mainly used to track physical register
-  // definitions. There is a RegisterMapping for every register defined by the
-  // Target. For each register, a RegisterMapping pair contains a descriptor of
-  // the last register write (in the form of a WriteRef object), as well as a
-  // RegisterRenamingInfo to quickly identify owning register files.
-  //
-  // This implementation does not allow overlapping register files. The only
-  // register file that is allowed to overlap with other register files is
-  // register file #0. If we exclude register #0, every register is "owned" by
-  // at most one register file.
-  using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
-
-  // This map contains one entry for each register defined by the target.
-  std::vector<RegisterMapping> RegisterMappings;
-
-  // This method creates a new register file descriptor.
-  // The new register file owns all of the registers declared by register
-  // classes in the 'RegisterClasses' set.
-  //
-  // Processor models allow the definition of RegisterFile(s) via tablegen. For
-  // example, this is a tablegen definition for a x86 register file for
-  // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1
-  // physical register).
-  //
-  //    def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]>
-  //
-  // Here FPRegisterFile contains all the registers defined by register class
-  // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
-  // registers which can be used for register renaming purpose.
-  void
-  addRegisterFile(llvm::ArrayRef<llvm::MCRegisterCostEntry> RegisterClasses,
-                  unsigned NumPhysRegs);
-
-  // Consumes physical registers in each register file specified by the
-  // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
-  void allocatePhysRegs(const RegisterRenamingInfo &Entry,
-                        llvm::MutableArrayRef<unsigned> UsedPhysRegs);
-
-  // Releases previously allocated physical registers from the register file(s).
-  // This method is called from `invalidateRegisterMapping()`.
-  void freePhysRegs(const RegisterRenamingInfo &Entry,
-                    llvm::MutableArrayRef<unsigned> FreedPhysRegs);
-
-  // Create an instance of RegisterMappingTracker for every register file
-  // specified by the processor model.
-  // If no register file is specified, then this method creates a default
-  // register file with an unbounded number of physical registers.
-  void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs);
-
-public:
-  RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri,
-               unsigned NumRegs = 0);
-
-  // This method updates the register mappings inserting a new register
-  // definition. This method is also responsible for updating the number of
-  // allocated physical registers in each register file modified by the write.
-  // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set.
-  void addRegisterWrite(WriteRef Write,
-                        llvm::MutableArrayRef<unsigned> UsedPhysRegs,
-                        bool ShouldAllocatePhysRegs = true);
-
-  // Removes write \param WS from the register mappings.
-  // Physical registers may be released to reflect this update.
-  void removeRegisterWrite(const WriteState &WS,
-                           llvm::MutableArrayRef<unsigned> FreedPhysRegs,
-                           bool ShouldFreePhysRegs = true);
-
-  // Checks if there are enough physical registers in the register files.
-  // Returns a "response mask" where each bit represents the response from a
-  // different register file.  A mask of all zeroes means that all register
-  // files are available.  Otherwise, the mask can be used to identify which
-  // register file was busy.  This sematic allows us to classify dispatch
-  // stalls caused by the lack of register file resources.
-  //
-  // Current implementation can simulate up to 32 register files (including the
-  // special register file at index #0).
-  unsigned isAvailable(llvm::ArrayRef<unsigned> Regs) const;
-  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Writes,
-                     unsigned RegID) const;
-  unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
Index: llvm/trunk/tools/llvm-mca/RegisterFile.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/RegisterFile.cpp
+++ llvm/trunk/tools/llvm-mca/RegisterFile.cpp
@@ -1,350 +0,0 @@
-//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a register mapping file class.  This class is responsible
-/// for managing hardware register files and the tracking of data dependencies
-/// between registers.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RegisterFile.h"
-#include "Instruction.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
-                           const llvm::MCRegisterInfo &mri, unsigned NumRegs)
-    : MRI(mri), RegisterMappings(mri.getNumRegs(),
-                                 {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
-  initialize(SM, NumRegs);
-}
-
-void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
-  // Create a default register file that "sees" all the machine registers
-  // declared by the target. The number of physical registers in the default
-  // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
-  // means: this register file has an unbounded number of physical registers.
-  addRegisterFile({} /* all registers */, NumRegs);
-  if (!SM.hasExtraProcessorInfo())
-    return;
-
-  // For each user defined register file, allocate a RegisterMappingTracker
-  // object. The size of every register file, as well as the mapping between
-  // register files and register classes is specified via tablegen.
-  const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
-  for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
-    const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
-    // Skip invalid register files with zero physical registers.
-    unsigned Length = RF.NumRegisterCostEntries;
-    if (!RF.NumPhysRegs)
-      continue;
-    // The cost of a register definition is equivalent to the number of
-    // physical registers that are allocated at register renaming stage.
-    const MCRegisterCostEntry *FirstElt =
-        &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
-    addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
-                    RF.NumPhysRegs);
-  }
-}
-
-void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
-                                   unsigned NumPhysRegs) {
-  // A default register file is always allocated at index #0. That register file
-  // is mainly used to count the total number of mappings created by all
-  // register files at runtime. Users can limit the number of available physical
-  // registers in register file #0 through the command line flag
-  // `-register-file-size`.
-  unsigned RegisterFileIndex = RegisterFiles.size();
-  RegisterFiles.emplace_back(NumPhysRegs);
-
-  // Special case where there is no register class identifier in the set.
-  // An empty set of register classes means: this register file contains all
-  // the physical registers specified by the target.
-  // We optimistically assume that a register can be renamed at the cost of a
-  // single physical register. The constructor of RegisterFile ensures that
-  // a RegisterMapping exists for each logical register defined by the Target.
-  if (Entries.empty())
-    return;
-
-  // Now update the cost of individual registers.
-  for (const MCRegisterCostEntry &RCE : Entries) {
-    const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
-    for (const MCPhysReg Reg : RC) {
-      RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
-      IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
-      if (IPC.first && IPC.first != RegisterFileIndex) {
-        // The only register file that is allowed to overlap is the default
-        // register file at index #0. The analysis is inaccurate if register
-        // files overlap.
-        errs() << "warning: register " << MRI.getName(Reg)
-               << " defined in multiple register files.";
-      }
-      IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
-      Entry.RenameAs = Reg;
-
-      // Assume the same cost for each sub-register.
-      for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
-        RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
-        if (!OtherEntry.IndexPlusCost.first &&
-            (!OtherEntry.RenameAs ||
-             MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
-          OtherEntry.IndexPlusCost = IPC;
-          OtherEntry.RenameAs = Reg;
-        }
-      }
-    }
-  }
-}
-
-void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
-                                    MutableArrayRef<unsigned> UsedPhysRegs) {
-  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
-  unsigned Cost = Entry.IndexPlusCost.second;
-  if (RegisterFileIndex) {
-    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
-    RMT.NumUsedPhysRegs += Cost;
-    UsedPhysRegs[RegisterFileIndex] += Cost;
-  }
-
-  // Now update the default register mapping tracker.
-  RegisterFiles[0].NumUsedPhysRegs += Cost;
-  UsedPhysRegs[0] += Cost;
-}
-
-void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
-                                MutableArrayRef<unsigned> FreedPhysRegs) {
-  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
-  unsigned Cost = Entry.IndexPlusCost.second;
-  if (RegisterFileIndex) {
-    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
-    RMT.NumUsedPhysRegs -= Cost;
-    FreedPhysRegs[RegisterFileIndex] += Cost;
-  }
-
-  // Now update the default register mapping tracker.
-  RegisterFiles[0].NumUsedPhysRegs -= Cost;
-  FreedPhysRegs[0] += Cost;
-}
-
-void RegisterFile::addRegisterWrite(WriteRef Write,
-                                    MutableArrayRef<unsigned> UsedPhysRegs,
-                                    bool ShouldAllocatePhysRegs) {
-  WriteState &WS = *Write.getWriteState();
-  unsigned RegID = WS.getRegisterID();
-  assert(RegID && "Adding an invalid register definition?");
-
-  LLVM_DEBUG({
-    dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
-           << ", " << MRI.getName(RegID) << "]\n";
-  });
-
-  // If RenameAs is equal to RegID, then RegID is subject to register renaming
-  // and false dependencies on RegID are all eliminated.
-
-  // If RenameAs references the invalid register, then we optimistically assume
-  // that it can be renamed. In the absence of tablegen descriptors for register
-  // files, RenameAs is always set to the invalid register ID.  In all other
-  // cases, RenameAs must be either equal to RegID, or it must reference a
-  // super-register of RegID.
-
-  // If RenameAs is a super-register of RegID, then a write to RegID has always
-  // a false dependency on RenameAs. The only exception is for when the write
-  // implicitly clears the upper portion of the underlying register.
-  // If a write clears its super-registers, then it is renamed as `RenameAs`.
-  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
-  if (RRI.RenameAs && RRI.RenameAs != RegID) {
-    RegID = RRI.RenameAs;
-    WriteRef &OtherWrite = RegisterMappings[RegID].first;
-
-    if (!WS.clearsSuperRegisters()) {
-      // The processor keeps the definition of `RegID` together with register
-      // `RenameAs`. Since this partial write is not renamed, no physical
-      // register is allocated.
-      ShouldAllocatePhysRegs = false;
-
-      if (OtherWrite.getWriteState() &&
-          (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
-        // This partial write has a false dependency on RenameAs.
-        WS.setDependentWrite(OtherWrite.getWriteState());
-      }
-    }
-  }
-
-  // Update the mapping for register RegID including its sub-registers.
-  RegisterMappings[RegID].first = Write;
-  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
-    RegisterMappings[*I].first = Write;
-
-  // No physical registers are allocated for instructions that are optimized in
-  // hardware. For example, zero-latency data-dependency breaking instructions
-  // don't consume physical registers.
-  if (ShouldAllocatePhysRegs)
-    allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
-
-  if (!WS.clearsSuperRegisters())
-    return;
-
-  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
-    RegisterMappings[*I].first = Write;
-}
-
-void RegisterFile::removeRegisterWrite(const WriteState &WS,
-                                       MutableArrayRef<unsigned> FreedPhysRegs,
-                                       bool ShouldFreePhysRegs) {
-  unsigned RegID = WS.getRegisterID();
-
-  assert(RegID != 0 && "Invalidating an already invalid register?");
-  assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
-         "Invalidating a write of unknown cycles!");
-  assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
-
-  unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
-  if (RenameAs && RenameAs != RegID) {
-    RegID = RenameAs;
-
-    if (!WS.clearsSuperRegisters()) {
-      // Keep the definition of `RegID` together with register `RenameAs`.
-      ShouldFreePhysRegs = false;
-    }
-  }
-
-  if (ShouldFreePhysRegs)
-    freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
-
-  WriteRef &WR = RegisterMappings[RegID].first;
-  if (WR.getWriteState() == &WS)
-    WR.invalidate();
-
-  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
-    WriteRef &OtherWR = RegisterMappings[*I].first;
-    if (OtherWR.getWriteState() == &WS)
-      OtherWR.invalidate();
-  }
-
-  if (!WS.clearsSuperRegisters())
-    return;
-
-  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
-    WriteRef &OtherWR = RegisterMappings[*I].first;
-    if (OtherWR.getWriteState() == &WS)
-      OtherWR.invalidate();
-  }
-}
-
-void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
-                                 unsigned RegID) const {
-  assert(RegID && RegID < RegisterMappings.size());
-  LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
-                    << MRI.getName(RegID) << '\n');
-  const WriteRef &WR = RegisterMappings[RegID].first;
-  if (WR.isValid())
-    Writes.push_back(WR);
-
-  // Handle potential partial register updates.
-  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
-    const WriteRef &WR = RegisterMappings[*I].first;
-    if (WR.isValid())
-      Writes.push_back(WR);
-  }
-
-  // Remove duplicate entries and resize the input vector.
-  llvm::sort(Writes.begin(), Writes.end(),
-             [](const WriteRef &Lhs, const WriteRef &Rhs) {
-               return Lhs.getWriteState() < Rhs.getWriteState();
-             });
-  auto It = std::unique(Writes.begin(), Writes.end());
-  Writes.resize(std::distance(Writes.begin(), It));
-
-  LLVM_DEBUG({
-    for (const WriteRef &WR : Writes) {
-      const WriteState &WS = *WR.getWriteState();
-      dbgs() << "[PRF] Found a dependent use of Register "
-             << MRI.getName(WS.getRegisterID()) << " (defined by intruction #"
-             << WR.getSourceIndex() << ")\n";
-    }
-  });
-}
-
-unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
-  SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
-
-  // Find how many new mappings must be created for each register file.
-  for (const unsigned RegID : Regs) {
-    const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
-    const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
-    if (Entry.first)
-      NumPhysRegs[Entry.first] += Entry.second;
-    NumPhysRegs[0] += Entry.second;
-  }
-
-  unsigned Response = 0;
-  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
-    unsigned NumRegs = NumPhysRegs[I];
-    if (!NumRegs)
-      continue;
-
-    const RegisterMappingTracker &RMT = RegisterFiles[I];
-    if (!RMT.NumPhysRegs) {
-      // The register file has an unbounded number of microarchitectural
-      // registers.
-      continue;
-    }
-
-    if (RMT.NumPhysRegs < NumRegs) {
-      // The current register file is too small. This may occur if the number of
-      // microarchitectural registers in register file #0 was changed by the
-      // users via flag -reg-file-size. Alternatively, the scheduling model
-      // specified a too small number of registers for this register file.
-      LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
-
-      // FIXME: Normalize the instruction register count to match the
-      // NumPhysRegs value.  This is a highly unusual case, and is not expected
-      // to occur.  This normalization is hiding an inconsistency in either the
-      // scheduling model or in the value that the user might have specified
-      // for NumPhysRegs.
-      NumRegs = RMT.NumPhysRegs;
-    }
-
-    if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
-      Response |= (1U << I);
-  }
-
-  return Response;
-}
-
-#ifndef NDEBUG
-void RegisterFile::dump() const {
-  for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
-    const RegisterMapping &RM = RegisterMappings[I];
-    if (!RM.first.getWriteState())
-      continue;
-    const RegisterRenamingInfo &RRI = RM.second;
-    dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
-           << ", Cost=" << RRI.IndexPlusCost.second
-           << ", RenameAs=" << RRI.RenameAs << ", ";
-    RM.first.dump();
-    dbgs() << '\n';
-  }
-
-  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
-    dbgs() << "Register File #" << I;
-    const RegisterMappingTracker &RMT = RegisterFiles[I];
-    dbgs() << "\n  TotalMappings:        " << RMT.NumPhysRegs
-           << "\n  NumUsedMappings:      " << RMT.NumUsedPhysRegs << '\n';
-  }
-}
-#endif
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/ResourceManager.h
===================================================================
--- llvm/trunk/tools/llvm-mca/ResourceManager.h
+++ llvm/trunk/tools/llvm-mca/ResourceManager.h
@@ -1,360 +0,0 @@
-//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// The classes here represent processor resource units and their management
-/// strategy.  These classes are managed by the Scheduler.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
-#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
-
-#include "Instruction.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-/// Used to notify the internal state of a processor resource.
-///
-/// A processor resource is available if it is not reserved, and there are
-/// available slots in the buffer.  A processor resource is unavailable if it
-/// is either reserved, or the associated buffer is full. A processor resource
-/// with a buffer size of -1 is always available if it is not reserved.
-///
-/// Values of type ResourceStateEvent are returned by method
-/// ResourceState::isBufferAvailable(), which is used to query the internal
-/// state of a resource.
-///
-/// The naming convention for resource state events is:
-///  * Event names start with prefix RS_
-///  * Prefix RS_ is followed by a string describing the actual resource state.
-enum ResourceStateEvent {
-  RS_BUFFER_AVAILABLE,
-  RS_BUFFER_UNAVAILABLE,
-  RS_RESERVED
-};
-
-/// Resource allocation strategy used by hardware scheduler resources.
-class ResourceStrategy {
-  ResourceStrategy(const ResourceStrategy &) = delete;
-  ResourceStrategy &operator=(const ResourceStrategy &) = delete;
-
-public:
-  ResourceStrategy() {}
-  virtual ~ResourceStrategy();
-
-  /// Selects a processor resource unit from a ReadyMask.
-  virtual uint64_t select(uint64_t ReadyMask) = 0;
-
-  /// Called by the ResourceManager when a processor resource group, or a
-  /// processor resource with multiple units has become unavailable.
-  ///
-  /// The default strategy uses this information to bias its selection logic.
-  virtual void used(uint64_t ResourceMask) {}
-};
-
-/// Default resource allocation strategy used by processor resource groups and
-/// processor resources with multiple units.
-class DefaultResourceStrategy final : public ResourceStrategy {
-  /// A Mask of resource unit identifiers.
-  ///
-  /// There is one bit set for every available resource unit.
-  /// It defaults to the value of field ResourceSizeMask in ResourceState.
-  const unsigned ResourceUnitMask;
-
-  /// A simple round-robin selector for processor resource units.
-  /// Each bit of this mask identifies a sub resource within a group.
-  ///
-  /// As an example, lets assume that this is a default policy for a
-  /// processor resource group composed by the following three units:
-  ///   ResourceA -- 0b001
-  ///   ResourceB -- 0b010
-  ///   ResourceC -- 0b100
-  ///
-  /// Field NextInSequenceMask is used to select the next unit from the set of
-  /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
-  /// this example, it defaults to mask '0b111').
-  ///
-  /// The round-robin selector would firstly select 'ResourceC', then
-  /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
-  /// corresponding bit in NextInSequenceMask is cleared.  For example, if
-  /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
-  /// 0xb011.
-  ///
-  /// When NextInSequenceMask becomes zero, it is automatically reset to the
-  /// default value (i.e. ResourceUnitMask).
-  uint64_t NextInSequenceMask;
-
-  /// This field is used to track resource units that are used (i.e. selected)
-  /// by other groups other than the one associated with this strategy object.
-  ///
-  /// In LLVM processor resource groups are allowed to partially (or fully)
-  /// overlap. That means, a same unit may be visible to multiple groups.
-  /// This field keeps track of uses that have originated from outside of
-  /// this group. The idea is to bias the selection strategy, so that resources
-  /// that haven't been used by other groups get prioritized.
-  ///
-  /// The end goal is to (try to) keep the resource distribution as much uniform
-  /// as possible. By construction, this mask only tracks one-level of resource
-  /// usage. Therefore, this strategy is expected to be less accurate when same
-  /// units are used multiple times by other groups within a single round of
-  /// select.
-  ///
-  /// Note: an LRU selector would have a better accuracy at the cost of being
-  /// slightly more expensive (mostly in terms of runtime cost). Methods
-  /// 'select' and 'used', are always in the hot execution path of llvm-mca.
-  /// Therefore, a slow implementation of 'select' would have a negative impact
-  /// on the overall performance of the tool.
-  uint64_t RemovedFromNextInSequence;
-
-  void skipMask(uint64_t Mask);
-
-public:
-  DefaultResourceStrategy(uint64_t UnitMask)
-      : ResourceStrategy(), ResourceUnitMask(UnitMask),
-        NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
-  virtual ~DefaultResourceStrategy() = default;
-
-  uint64_t select(uint64_t ReadyMask) override;
-  void used(uint64_t Mask) override;
-};
-
-/// A processor resource descriptor.
-///
-/// There is an instance of this class for every processor resource defined by
-/// the machine scheduling model.
-/// Objects of class ResourceState dynamically track the usage of processor
-/// resource units.
-class ResourceState {
-  /// An index to the MCProcResourceDesc entry in the processor model.
-  const unsigned ProcResourceDescIndex;
-  /// A resource mask. This is generated by the tool with the help of
-  /// function `mca::createProcResourceMasks' (see Support.h).
-  const uint64_t ResourceMask;
-
-  /// A ProcResource can have multiple units.
-  ///
-  /// For processor resource groups,
-  /// this field default to the value of field `ResourceMask`; the number of
-  /// bits set is equal to the cardinality of the group.  For normal (i.e.
-  /// non-group) resources, the number of bits set in this mask is equivalent
-  /// to the number of units declared by the processor model (see field
-  /// 'NumUnits' in 'ProcResourceUnits').
-  uint64_t ResourceSizeMask;
-
-  /// A mask of ready units.
-  uint64_t ReadyMask;
-
-  /// Buffered resources will have this field set to a positive number different
-  /// than zero. A buffered resource behaves like a reservation station
-  /// implementing its own buffer for out-of-order execution.
-  ///
-  /// A BufferSize of 1 is used by scheduler resources that force in-order
-  /// execution.
-  ///
-  /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
-  /// Since in-order issue/dispatch resources don't implement buffers, dispatch
-  /// events coincide with issue events.
-  /// Also, no other instruction ca be dispatched/issue while this resource is
-  /// in use. Only when all the "resource cycles" are consumed (after the issue
-  /// event), a new instruction ca be dispatched.
-  const int BufferSize;
-
-  /// Available slots in the buffer (zero, if this is not a buffered resource).
-  unsigned AvailableSlots;
-
-  /// This field is set if this resource is currently reserved.
-  ///
-  /// Resources can be reserved for a number of cycles.
-  /// Instructions can still be dispatched to reserved resources. However,
-  /// istructions dispatched to a reserved resource cannot be issued to the
-  /// underlying units (i.e. pipelines) until the resource is released.
-  bool Unavailable;
-
-  /// Checks for the availability of unit 'SubResMask' in the group.
-  bool isSubResourceReady(uint64_t SubResMask) const {
-    return ReadyMask & SubResMask;
-  }
-
-public:
-  ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index,
-                uint64_t Mask);
-
-  unsigned getProcResourceID() const { return ProcResourceDescIndex; }
-  uint64_t getResourceMask() const { return ResourceMask; }
-  uint64_t getReadyMask() const { return ReadyMask; }
-  int getBufferSize() const { return BufferSize; }
-
-  bool isBuffered() const { return BufferSize > 0; }
-  bool isInOrder() const { return BufferSize == 1; }
-
-  /// Returns true if this is an in-order dispatch/issue resource.
-  bool isADispatchHazard() const { return BufferSize == 0; }
-  bool isReserved() const { return Unavailable; }
-
-  void setReserved() { Unavailable = true; }
-  void clearReserved() { Unavailable = false; }
-
-  /// Returs true if this resource is not reserved, and if there are at least
-  /// `NumUnits` available units.
-  bool isReady(unsigned NumUnits = 1) const;
-
-  bool isAResourceGroup() const {
-    return llvm::countPopulation(ResourceMask) > 1;
-  }
-
-  bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
-
-  void markSubResourceAsUsed(uint64_t ID) {
-    assert(isSubResourceReady(ID));
-    ReadyMask ^= ID;
-  }
-
-  void releaseSubResource(uint64_t ID) {
-    assert(!isSubResourceReady(ID));
-    ReadyMask ^= ID;
-  }
-
-  unsigned getNumUnits() const {
-    return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
-  }
-
-  /// Checks if there is an available slot in the resource buffer.
-  ///
-  /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
-  /// there is a slot available.
-  ///
-  /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
-  /// is reserved.
-  ///
-  /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
-  ResourceStateEvent isBufferAvailable() const;
-
-  /// Reserve a slot in the buffer.
-  void reserveBuffer() {
-    if (AvailableSlots)
-      AvailableSlots--;
-  }
-
-  /// Release a slot in the buffer.
-  void releaseBuffer() {
-    if (BufferSize > 0)
-      AvailableSlots++;
-    assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
-  }
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-/// A resource unit identifier.
-///
-/// This is used to identify a specific processor resource unit using a pair
-/// of indices where the 'first' index is a processor resource mask, and the
-/// 'second' index is an index for a "sub-resource" (i.e. unit).
-typedef std::pair<uint64_t, uint64_t> ResourceRef;
-
-// First: a MCProcResourceDesc index identifying a buffered resource.
-// Second: max number of buffer entries used in this resource.
-typedef std::pair<unsigned, unsigned> BufferUsageEntry;
-
-/// A resource manager for processor resource units and groups.
-///
-/// This class owns all the ResourceState objects, and it is responsible for
-/// acting on requests from a Scheduler by updating the internal state of
-/// ResourceState objects.
-/// This class doesn't know about instruction itineraries and functional units.
-/// In future, it can be extended to support itineraries too through the same
-/// public interface.
-class ResourceManager {
-  // The resource manager owns all the ResourceState.
-  std::vector<std::unique_ptr<ResourceState>> Resources;
-  std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
-
-  // Keeps track of which resources are busy, and how many cycles are left
-  // before those become usable again.
-  llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
-
-  // A table to map processor resource IDs to processor resource masks.
-  llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
-
-  // Returns the actual resource unit that will be used.
-  ResourceRef selectPipe(uint64_t ResourceID);
-
-  void use(const ResourceRef &RR);
-  void release(const ResourceRef &RR);
-
-  unsigned getNumUnits(uint64_t ResourceID) const;
-
-  // Overrides the selection strategy for the processor resource with the given
-  // mask.
-  void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
-                             uint64_t ResourceMask);
-
-public:
-  ResourceManager(const llvm::MCSchedModel &SM);
-  virtual ~ResourceManager() = default;
-
-  // Overrides the selection strategy for the resource at index ResourceID in
-  // the MCProcResourceDesc table.
-  void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
-                         unsigned ResourceID) {
-    assert(ResourceID < ProcResID2Mask.size() &&
-           "Invalid resource index in input!");
-    return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
-  }
-
-  // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
-  // there are enough available slots in the buffers.
-  ResourceStateEvent canBeDispatched(llvm::ArrayRef<uint64_t> Buffers) const;
-
-  // Return the processor resource identifier associated to this Mask.
-  unsigned resolveResourceMask(uint64_t Mask) const;
-
-  // Consume a slot in every buffered resource from array 'Buffers'. Resource
-  // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
-  void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  // Release buffer entries previously allocated by method reserveBuffers.
-  void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers);
-
-  // Reserve a processor resource. A reserved resource is not available for
-  // instruction issue until it is released.
-  void reserveResource(uint64_t ResourceID);
-
-  // Release a previously reserved processor resource.
-  void releaseResource(uint64_t ResourceID);
-
-  // Returns true if all resources are in-order, and there is at least one
-  // resource which is a dispatch hazard (BufferSize = 0).
-  bool mustIssueImmediately(const InstrDesc &Desc) const;
-
-  bool canBeIssued(const InstrDesc &Desc) const;
-
-  void issueInstruction(
-      const InstrDesc &Desc,
-      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
-  void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
-
-#ifndef NDEBUG
-  void dump() const {
-    for (const std::unique_ptr<ResourceState> &Resource : Resources)
-      Resource->dump();
-  }
-#endif
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
Index: llvm/trunk/tools/llvm-mca/ResourceManager.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/ResourceManager.cpp
+++ llvm/trunk/tools/llvm-mca/ResourceManager.cpp
@@ -1,309 +0,0 @@
-//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// The classes here represent processor resource units and their management
-/// strategy.  These classes are managed by the Scheduler.
-///
-//===----------------------------------------------------------------------===//
-
-#include "ResourceManager.h"
-#include "Support.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mca {
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-ResourceStrategy::~ResourceStrategy() = default;
-
-void DefaultResourceStrategy::skipMask(uint64_t Mask) {
-  NextInSequenceMask &= (~Mask);
-  if (!NextInSequenceMask) {
-    NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
-    RemovedFromNextInSequence = 0;
-  }
-}
-
-uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
-  // This method assumes that ReadyMask cannot be zero.
-  uint64_t CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask);
-  while (!(ReadyMask & CandidateMask)) {
-    skipMask(CandidateMask);
-    CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask);
-  }
-  return CandidateMask;
-}
-
-void DefaultResourceStrategy::used(uint64_t Mask) {
-  if (Mask > NextInSequenceMask) {
-    RemovedFromNextInSequence |= Mask;
-    return;
-  }
-  skipMask(Mask);
-}
-
-ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
-                             uint64_t Mask)
-    : ProcResourceDescIndex(Index), ResourceMask(Mask),
-      BufferSize(Desc.BufferSize) {
-  if (llvm::countPopulation(ResourceMask) > 1)
-    ResourceSizeMask = ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
-  else
-    ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
-  ReadyMask = ResourceSizeMask;
-  AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
-  Unavailable = false;
-}
-
-bool ResourceState::isReady(unsigned NumUnits) const {
-  return (!isReserved() || isADispatchHazard()) &&
-         llvm::countPopulation(ReadyMask) >= NumUnits;
-}
-
-ResourceStateEvent ResourceState::isBufferAvailable() const {
-  if (isADispatchHazard() && isReserved())
-    return RS_RESERVED;
-  if (!isBuffered() || AvailableSlots)
-    return RS_BUFFER_AVAILABLE;
-  return RS_BUFFER_UNAVAILABLE;
-}
-
-#ifndef NDEBUG
-void ResourceState::dump() const {
-  dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
-         << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize
-         << ", AvailableSlots=" << AvailableSlots
-         << ", Reserved=" << Unavailable << '\n';
-}
-#endif
-
-static unsigned getResourceStateIndex(uint64_t Mask) {
-  return std::numeric_limits<uint64_t>::digits - llvm::countLeadingZeros(Mask);
-}
-
-static std::unique_ptr<ResourceStrategy>
-getStrategyFor(const ResourceState &RS) {
-  if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
-    return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
-  return std::unique_ptr<ResourceStrategy>(nullptr);
-}
-
-ResourceManager::ResourceManager(const MCSchedModel &SM)
-    : ProcResID2Mask(SM.getNumProcResourceKinds()) {
-  computeProcResourceMasks(SM, ProcResID2Mask);
-  Resources.resize(SM.getNumProcResourceKinds());
-  Strategies.resize(SM.getNumProcResourceKinds());
-
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    uint64_t Mask = ProcResID2Mask[I];
-    unsigned Index = getResourceStateIndex(Mask);
-    Resources[Index] =
-        llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
-    Strategies[Index] = getStrategyFor(*Resources[Index]);
-  }
-}
-
-void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
-                                            uint64_t ResourceMask) {
-  unsigned Index = getResourceStateIndex(ResourceMask);
-  assert(Index < Resources.size() && "Invalid processor resource index!");
-  assert(S && "Unexpected null strategy in input!");
-  Strategies[Index] = std::move(S);
-}
-
-unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
-  return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
-}
-
-unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
-  return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
-}
-
-// Returns the actual resource consumed by this Use.
-// First, is the primary resource ID.
-// Second, is the specific sub-resource ID.
-ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
-  unsigned Index = getResourceStateIndex(ResourceID);
-  ResourceState &RS = *Resources[Index];
-  assert(RS.isReady() && "No available units to select!");
-
-  // Special case where RS is not a group, and it only declares a single
-  // resource unit.
-  if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
-    return std::make_pair(ResourceID, RS.getReadyMask());
-
-  uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
-  if (RS.isAResourceGroup())
-    return selectPipe(SubResourceID);
-  return std::make_pair(ResourceID, SubResourceID);
-}
-
-void ResourceManager::use(const ResourceRef &RR) {
-  // Mark the sub-resource referenced by RR as used.
-  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
-  RS.markSubResourceAsUsed(RR.second);
-  // If there are still available units in RR.first,
-  // then we are done.
-  if (RS.isReady())
-    return;
-
-  // Notify to other resources that RR.first is no longer available.
-  for (std::unique_ptr<ResourceState> &Res : Resources) {
-    ResourceState &Current = *Res;
-    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
-      continue;
-
-    if (Current.containsResource(RR.first)) {
-      unsigned Index = getResourceStateIndex(Current.getResourceMask());
-      Current.markSubResourceAsUsed(RR.first);
-      Strategies[Index]->used(RR.first);
-    }
-  }
-}
-
-void ResourceManager::release(const ResourceRef &RR) {
-  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
-  bool WasFullyUsed = !RS.isReady();
-  RS.releaseSubResource(RR.second);
-  if (!WasFullyUsed)
-    return;
-
-  for (std::unique_ptr<ResourceState> &Res : Resources) {
-    ResourceState &Current = *Res;
-    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
-      continue;
-
-    if (Current.containsResource(RR.first))
-      Current.releaseSubResource(RR.first);
-  }
-}
-
-ResourceStateEvent
-ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
-  ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
-  for (uint64_t Buffer : Buffers) {
-    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
-    Result = RS.isBufferAvailable();
-    if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
-      break;
-  }
-  return Result;
-}
-
-void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
-  for (const uint64_t Buffer : Buffers) {
-    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
-    assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
-    RS.reserveBuffer();
-
-    if (RS.isADispatchHazard()) {
-      assert(!RS.isReserved());
-      RS.setReserved();
-    }
-  }
-}
-
-void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
-  for (const uint64_t R : Buffers)
-    Resources[getResourceStateIndex(R)]->releaseBuffer();
-}
-
-bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
-  return std::all_of(Desc.Resources.begin(), Desc.Resources.end(),
-                     [&](const std::pair<uint64_t, const ResourceUsage> &E) {
-                       unsigned NumUnits =
-                           E.second.isReserved() ? 0U : E.second.NumUnits;
-                       unsigned Index = getResourceStateIndex(E.first);
-                       return Resources[Index]->isReady(NumUnits);
-                     });
-}
-
-// Returns true if all resources are in-order, and there is at least one
-// resource which is a dispatch hazard (BufferSize = 0).
-bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
-  if (!canBeIssued(Desc))
-    return false;
-  bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
-    unsigned Index = getResourceStateIndex(BufferMask);
-    const ResourceState &Resource = *Resources[Index];
-    return Resource.isInOrder() || Resource.isADispatchHazard();
-  });
-  if (!AllInOrderResources)
-    return false;
-
-  return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
-    return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard();
-  });
-}
-
-void ResourceManager::issueInstruction(
-    const InstrDesc &Desc,
-    SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
-  for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
-    const CycleSegment &CS = R.second.CS;
-    if (!CS.size()) {
-      releaseResource(R.first);
-      continue;
-    }
-
-    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
-    if (!R.second.isReserved()) {
-      ResourceRef Pipe = selectPipe(R.first);
-      use(Pipe);
-      BusyResources[Pipe] += CS.size();
-      // Replace the resource mask with a valid processor resource index.
-      const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
-      Pipe.first = RS.getProcResourceID();
-      Pipes.emplace_back(
-          std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
-    } else {
-      assert((countPopulation(R.first) > 1) && "Expected a group!");
-      // Mark this group as reserved.
-      assert(R.second.isReserved());
-      reserveResource(R.first);
-      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
-    }
-  }
-}
-
-void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
-  for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
-    if (BR.second)
-      BR.second--;
-    if (!BR.second) {
-      // Release this resource.
-      const ResourceRef &RR = BR.first;
-
-      if (countPopulation(RR.first) == 1)
-        release(RR);
-
-      releaseResource(RR.first);
-      ResourcesFreed.push_back(RR);
-    }
-  }
-
-  for (const ResourceRef &RF : ResourcesFreed)
-    BusyResources.erase(RF);
-}
-
-void ResourceManager::reserveResource(uint64_t ResourceID) {
-  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
-  assert(!Resource.isReserved());
-  Resource.setReserved();
-}
-
-void ResourceManager::releaseResource(uint64_t ResourceID) {
-  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
-  Resource.clearReserved();
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/RetireControlUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/RetireControlUnit.h
+++ llvm/trunk/tools/llvm-mca/RetireControlUnit.h
@@ -1,97 +0,0 @@
-//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file simulates the hardware responsible for retiring instructions.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
-
-#include "HardwareUnit.h"
-#include "Instruction.h"
-#include "llvm/MC/MCSchedule.h"
-#include <vector>
-
-namespace mca {
-
-/// This class tracks which instructions are in-flight (i.e., dispatched but not
-/// retired) in the OoO backend.
-//
-/// This class checks on every cycle if/which instructions can be retired.
-/// Instructions are retired in program order.
-/// In the event of an instruction being retired, the pipeline that owns
-/// this RetireControlUnit (RCU) gets notified.
-///
-/// On instruction retired, register updates are all architecturally
-/// committed, and any physicall registers previously allocated for the
-/// retired instruction are freed.
-struct RetireControlUnit : public HardwareUnit {
-  // A RUToken is created by the RCU for every instruction dispatched to the
-  // schedulers.  These "tokens" are managed by the RCU in its token Queue.
-  //
-  // On every cycle ('cycleEvent'), the RCU iterates through the token queue
-  // looking for any token with its 'Executed' flag set.  If a token has that
-  // flag set, then the instruction has reached the write-back stage and will
-  // be retired by the RCU.
-  //
-  // 'NumSlots' represents the number of entries consumed by the instruction in
-  // the reorder buffer. Those entries will become available again once the
-  // instruction is retired.
-  //
-  // Note that the size of the reorder buffer is defined by the scheduling
-  // model via field 'NumMicroOpBufferSize'.
-  struct RUToken {
-    InstRef IR;
-    unsigned NumSlots; // Slots reserved to this instruction.
-    bool Executed;     // True if the instruction is past the WB stage.
-  };
-
-private:
-  unsigned NextAvailableSlotIdx;
-  unsigned CurrentInstructionSlotIdx;
-  unsigned AvailableSlots;
-  unsigned MaxRetirePerCycle; // 0 means no limit.
-  std::vector<RUToken> Queue;
-
-public:
-  RetireControlUnit(const llvm::MCSchedModel &SM);
-
-  bool isEmpty() const { return AvailableSlots == Queue.size(); }
-  bool isAvailable(unsigned Quantity = 1) const {
-    // Some instructions may declare a number of uOps which exceeds the size
-    // of the reorder buffer. To avoid problems, cap the amount of slots to
-    // the size of the reorder buffer.
-    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
-    return AvailableSlots >= Quantity;
-  }
-
-  unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
-
-  // Reserves a number of slots, and returns a new token.
-  unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
-
-  // Return the current token from the RCU's circular token queue.
-  const RUToken &peekCurrentToken() const;
-
-  // Advance the pointer to the next token in the circular token queue.
-  void consumeCurrentToken();
-
-  // Update the RCU token to represent the executed state.
-  void onInstructionExecuted(unsigned TokenID);
-
-#ifndef NDEBUG
-  void dump() const;
-#endif
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
Index: llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp
+++ llvm/trunk/tools/llvm-mca/RetireControlUnit.cpp
@@ -1,87 +0,0 @@
-//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file simulates the hardware responsible for retiring instructions.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RetireControlUnit.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
-    : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
-      AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
-  // Check if the scheduling model provides extra information about the machine
-  // processor. If so, then use that information to set the reorder buffer size
-  // and the maximum number of instructions retired per cycle.
-  if (SM.hasExtraProcessorInfo()) {
-    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
-    if (EPI.ReorderBufferSize)
-      AvailableSlots = EPI.ReorderBufferSize;
-    MaxRetirePerCycle = EPI.MaxRetirePerCycle;
-  }
-
-  assert(AvailableSlots && "Invalid reorder buffer size!");
-  Queue.resize(AvailableSlots);
-}
-
-// Reserves a number of slots, and returns a new token.
-unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
-                                        unsigned NumMicroOps) {
-  assert(isAvailable(NumMicroOps));
-  unsigned NormalizedQuantity =
-      std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
-  // Zero latency instructions may have zero mOps. Artificially bump this
-  // value to 1. Although zero latency instructions don't consume scheduler
-  // resources, they still consume one slot in the retire queue.
-  NormalizedQuantity = std::max(NormalizedQuantity, 1U);
-  unsigned TokenID = NextAvailableSlotIdx;
-  Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
-  NextAvailableSlotIdx += NormalizedQuantity;
-  NextAvailableSlotIdx %= Queue.size();
-  AvailableSlots -= NormalizedQuantity;
-  return TokenID;
-}
-
-const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
-  return Queue[CurrentInstructionSlotIdx];
-}
-
-void RetireControlUnit::consumeCurrentToken() {
-  const RetireControlUnit::RUToken &Current = peekCurrentToken();
-  assert(Current.NumSlots && "Reserved zero slots?");
-  assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue.");
-
-  // Update the slot index to be the next item in the circular queue.
-  CurrentInstructionSlotIdx += Current.NumSlots;
-  CurrentInstructionSlotIdx %= Queue.size();
-  AvailableSlots += Current.NumSlots;
-}
-
-void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
-  assert(Queue.size() > TokenID);
-  assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid());
-  Queue[TokenID].Executed = true;
-}
-
-#ifndef NDEBUG
-void RetireControlUnit::dump() const {
-  dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
-         << ", Available Slots=" << AvailableSlots << " }\n";
-}
-#endif
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/RetireStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/RetireStage.h
+++ llvm/trunk/tools/llvm-mca/RetireStage.h
@@ -1,46 +0,0 @@
-//===---------------------- RetireStage.h -----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the retire stage of a default instruction pipeline.
-/// The RetireStage represents the process logic that interacts with the
-/// simulated RetireControlUnit hardware.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
-
-#include "RegisterFile.h"
-#include "RetireControlUnit.h"
-#include "Stage.h"
-
-namespace mca {
-
-class RetireStage final : public Stage {
-  // Owner will go away when we move listeners/eventing to the stages.
-  RetireControlUnit &RCU;
-  RegisterFile &PRF;
-
-  RetireStage(const RetireStage &Other) = delete;
-  RetireStage &operator=(const RetireStage &Other) = delete;
-
-public:
-  RetireStage(RetireControlUnit &R, RegisterFile &F)
-      : Stage(), RCU(R), PRF(F) {}
-
-  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
-  llvm::Error cycleStart() override;
-  llvm::Error execute(InstRef &IR) override;
-  void notifyInstructionRetired(const InstRef &IR);
-};
-
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
Index: llvm/trunk/tools/llvm-mca/RetireStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/RetireStage.cpp
+++ llvm/trunk/tools/llvm-mca/RetireStage.cpp
@@ -1,62 +0,0 @@
-//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the retire stage of an instruction pipeline.
-/// The RetireStage represents the process logic that interacts with the
-/// simulated RetireControlUnit hardware.
-///
-//===----------------------------------------------------------------------===//
-
-#include "RetireStage.h"
-#include "HWEventListener.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "llvm-mca"
-
-namespace mca {
-
-llvm::Error RetireStage::cycleStart() {
-  if (RCU.isEmpty())
-    return llvm::ErrorSuccess();
-
-  const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
-  unsigned NumRetired = 0;
-  while (!RCU.isEmpty()) {
-    if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
-      break;
-    const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken();
-    if (!Current.Executed)
-      break;
-    RCU.consumeCurrentToken();
-    notifyInstructionRetired(Current.IR);
-    NumRetired++;
-  }
-
-  return llvm::ErrorSuccess();
-}
-
-llvm::Error RetireStage::execute(InstRef &IR) {
-  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
-  return llvm::ErrorSuccess();
-}
-
-void RetireStage::notifyInstructionRetired(const InstRef &IR) {
-  LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
-  llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
-  const Instruction &Inst = *IR.getInstruction();
-  const InstrDesc &Desc = Inst.getDesc();
-
-  bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking());
-  for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
-    PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs);
-  notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/Scheduler.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Scheduler.h
+++ llvm/trunk/tools/llvm-mca/Scheduler.h
@@ -1,212 +0,0 @@
-//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// A scheduler for Processor Resource Units and Processor Resource Groups.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
-
-#include "HardwareUnit.h"
-#include "LSUnit.h"
-#include "ResourceManager.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-class SchedulerStrategy {
-public:
-  SchedulerStrategy() = default;
-  virtual ~SchedulerStrategy();
-
-  /// Returns true if Lhs should take priority over Rhs.
-  ///
-  /// This method is used by class Scheduler to select the "best" ready
-  /// instruction to issue to the underlying pipelines.
-  virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
-};
-
-/// Default instruction selection strategy used by class Scheduler.
-class DefaultSchedulerStrategy : public SchedulerStrategy {
-  /// This method ranks instructions based on their age, and the number of known
-  /// users. The lower the rank value, the better.
-  int computeRank(const InstRef &Lhs) const {
-    return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
-  }
-
-public:
-  DefaultSchedulerStrategy() = default;
-  virtual ~DefaultSchedulerStrategy();
-
-  bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
-    int LhsRank = computeRank(Lhs);
-    int RhsRank = computeRank(Rhs);
-
-    /// Prioritize older instructions over younger instructions to minimize the
-    /// pressure on the reorder buffer.
-    if (LhsRank == RhsRank)
-      return Lhs.getSourceIndex() < Rhs.getSourceIndex();
-    return LhsRank < RhsRank;
-  }
-};
-
-/// Class Scheduler is responsible for issuing instructions to pipeline
-/// resources.
-///
-/// Internally, it delegates to a ResourceManager the management of processor
-/// resources. This class is also responsible for tracking the progress of
-/// instructions from the dispatch stage, until the write-back stage.
-///
-/// An instruction dispatched to the Scheduler is initially placed into either
-/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
-/// operands.
-///
-/// An instruction is moved from the WaitSet to the ReadySet when register
-/// operands become available, and all memory dependencies are met.
-/// Instructions that are moved from the WaitSet to the ReadySet transition
-/// in state from 'IS_AVAILABLE' to 'IS_READY'.
-///
-/// On every cycle, the Scheduler checks if it can promote instructions from the
-/// WaitSet to the ReadySet.
-///
-/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
-/// to a (one or more) pipeline(s). This event also causes an instruction state
-/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
-/// leaves the IssuedSet when it reaches the write-back stage.
-class Scheduler : public HardwareUnit {
-  LSUnit *LSU;
-
-  // Instruction selection strategy for this Scheduler.
-  std::unique_ptr<SchedulerStrategy> Strategy;
-
-  // Hardware resources that are managed by this scheduler.
-  std::unique_ptr<ResourceManager> Resources;
-
-  std::vector<InstRef> WaitSet;
-  std::vector<InstRef> ReadySet;
-  std::vector<InstRef> IssuedSet;
-
-  /// Verify the given selection strategy and set the Strategy member
-  /// accordingly.  If no strategy is provided, the DefaultSchedulerStrategy is
-  /// used.
-  void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
-
-  /// Issue an instruction without updating the ready queue.
-  void issueInstructionImpl(
-      InstRef &IR,
-      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
-
-  // Identify instructions that have finished executing, and remove them from
-  // the IssuedSet. References to executed instructions are added to input
-  // vector 'Executed'.
-  void updateIssuedSet(llvm::SmallVectorImpl<InstRef> &Executed);
-
-  // Try to promote instructions from WaitSet to ReadySet.
-  // Add promoted instructions to the 'Ready' vector in input.
-  void promoteToReadySet(llvm::SmallVectorImpl<InstRef> &Ready);
-
-public:
-  Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu)
-      : LSU(Lsu), Resources(llvm::make_unique<ResourceManager>(Model)) {
-    initializeStrategy(nullptr);
-  }
-  Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu,
-            std::unique_ptr<SchedulerStrategy> SelectStrategy)
-      : LSU(Lsu), Resources(llvm::make_unique<ResourceManager>(Model)) {
-    initializeStrategy(std::move(SelectStrategy));
-  }
-  Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit *Lsu,
-            std::unique_ptr<SchedulerStrategy> SelectStrategy)
-      : LSU(Lsu), Resources(std::move(RM)) {
-    initializeStrategy(std::move(SelectStrategy));
-  }
-
-  // Stalls generated by the scheduler.
-  enum Status {
-    SC_AVAILABLE,
-    SC_LOAD_QUEUE_FULL,
-    SC_STORE_QUEUE_FULL,
-    SC_BUFFERS_FULL,
-    SC_DISPATCH_GROUP_STALL,
-  };
-
-  /// Check if the instruction in 'IR' can be dispatched and returns an answer
-  /// in the form of a Status value.
-  ///
-  /// The DispatchStage is responsible for querying the Scheduler before
-  /// dispatching new instructions. This routine is used for performing such
-  /// a query.  If the instruction 'IR' can be dispatched, then true is
-  /// returned, otherwise false is returned with Event set to the stall type.
-  /// Internally, it also checks if the load/store unit is available.
-  Status isAvailable(const InstRef &IR) const;
-
-  /// Reserves buffer and LSUnit queue resources that are necessary to issue
-  /// this instruction.
-  ///
-  /// Returns true if instruction IR is ready to be issued to the underlying
-  /// pipelines. Note that this operation cannot fail; it assumes that a
-  /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
-  void dispatch(const InstRef &IR);
-
-  /// Returns true if IR is ready to be executed by the underlying pipelines.
-  /// This method assumes that IR has been previously dispatched.
-  bool isReady(const InstRef &IR) const;
-
-  /// Issue an instruction and populates a vector of used pipeline resources,
-  /// and a vector of instructions that transitioned to the ready state as a
-  /// result of this event.
-  void
-  issueInstruction(InstRef &IR,
-                   llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used,
-                   llvm::SmallVectorImpl<InstRef> &Ready);
-
-  /// Returns true if IR has to be issued immediately, or if IR is a zero
-  /// latency instruction.
-  bool mustIssueImmediately(const InstRef &IR) const;
-
-  /// This routine notifies the Scheduler that a new cycle just started.
-  ///
-  /// It notifies the underlying ResourceManager that a new cycle just started.
-  /// Vector `Freed` is populated with resourceRef related to resources that
-  /// have changed in state, and that are now available to new instructions.
-  /// Instructions executed are added to vector Executed, while vector Ready is
-  /// populated with instructions that have become ready in this new cycle.
-  void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &Freed,
-                  llvm::SmallVectorImpl<InstRef> &Ready,
-                  llvm::SmallVectorImpl<InstRef> &Executed);
-
-  /// Convert a resource mask into a valid llvm processor resource identifier.
-  unsigned getResourceID(uint64_t Mask) const {
-    return Resources->resolveResourceMask(Mask);
-  }
-
-  /// Select the next instruction to issue from the ReadySet. Returns an invalid
-  /// instruction reference if there are no ready instructions, or if processor
-  /// resources are not available.
-  InstRef select();
-
-#ifndef NDEBUG
-  // Update the ready queues.
-  void dump() const;
-
-  // This routine performs a sanity check.  This routine should only be called
-  // when we know that 'IR' is not in the scheduler's instruction queues.
-  void sanityCheck(const InstRef &IR) const {
-    assert(llvm::find(WaitSet, IR) == WaitSet.end());
-    assert(llvm::find(ReadySet, IR) == ReadySet.end());
-    assert(llvm::find(IssuedSet, IR) == IssuedSet.end());
-  }
-#endif // !NDEBUG
-};
-} // namespace mca
-
-#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
Index: llvm/trunk/tools/llvm-mca/Scheduler.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Scheduler.cpp
+++ llvm/trunk/tools/llvm-mca/Scheduler.cpp
@@ -1,244 +0,0 @@
-//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// A scheduler for processor resource units and processor resource groups.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Scheduler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace mca {
-
-using namespace llvm;
-
-#define DEBUG_TYPE "llvm-mca"
-
-void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
-  // Ensure we have a valid (non-null) strategy object.
-  Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
-}
-
-// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
-SchedulerStrategy::~SchedulerStrategy() = default;
-DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
-
-#ifndef NDEBUG
-void Scheduler::dump() const {
-  dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
-  dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
-  dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
-  Resources->dump();
-}
-#endif
-
-Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-
-  switch (Resources->canBeDispatched(Desc.Buffers)) {
-  case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
-    return Scheduler::SC_BUFFERS_FULL;
-  case ResourceStateEvent::RS_RESERVED:
-    return Scheduler::SC_DISPATCH_GROUP_STALL;
-  case ResourceStateEvent::RS_BUFFER_AVAILABLE:
-    break;
-  }
-
-  // Give lower priority to LSUnit stall events.
-  switch (LSU->isAvailable(IR)) {
-  case LSUnit::LSU_LQUEUE_FULL:
-    return Scheduler::SC_LOAD_QUEUE_FULL;
-  case LSUnit::LSU_SQUEUE_FULL:
-    return Scheduler::SC_STORE_QUEUE_FULL;
-  case LSUnit::LSU_AVAILABLE:
-    return Scheduler::SC_AVAILABLE;
-  }
-
-  llvm_unreachable("Don't know how to process this LSU state result!");
-}
-
-void Scheduler::issueInstructionImpl(
-    InstRef &IR,
-    SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
-  Instruction *IS = IR.getInstruction();
-  const InstrDesc &D = IS->getDesc();
-
-  // Issue the instruction and collect all the consumed resources
-  // into a vector. That vector is then used to notify the listener.
-  Resources->issueInstruction(D, UsedResources);
-
-  // Notify the instruction that it started executing.
-  // This updates the internal state of each write.
-  IS->execute();
-
-  if (IS->isExecuting())
-    IssuedSet.emplace_back(IR);
-  else if (IS->isExecuted())
-    LSU->onInstructionExecuted(IR);
-}
-
-// Release the buffered resources and issue the instruction.
-void Scheduler::issueInstruction(
-    InstRef &IR, SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources,
-    SmallVectorImpl<InstRef> &ReadyInstructions) {
-  const Instruction &Inst = *IR.getInstruction();
-  bool HasDependentUsers = Inst.hasDependentUsers();
-
-  Resources->releaseBuffers(Inst.getDesc().Buffers);
-  issueInstructionImpl(IR, UsedResources);
-  // Instructions that have been issued during this cycle might have unblocked
-  // other dependent instructions. Dependent instructions may be issued during
-  // this same cycle if operands have ReadAdvance entries.  Promote those
-  // instructions to the ReadySet and notify the caller that those are ready.
-  if (HasDependentUsers)
-    promoteToReadySet(ReadyInstructions);
-}
-
-void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
-  // Scan the set of waiting instructions and promote them to the
-  // ready queue if operands are all ready.
-  unsigned RemovedElements = 0;
-  for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
-    InstRef &IR = *I;
-    if (!IR.isValid())
-      break;
-
-    // Check if this instruction is now ready. In case, force
-    // a transition in state using method 'update()'.
-    Instruction &IS = *IR.getInstruction();
-    if (!IS.isReady())
-      IS.update();
-
-    // Check if there are still unsolved data dependencies.
-    if (!isReady(IR)) {
-      ++I;
-      continue;
-    }
-
-    Ready.emplace_back(IR);
-    ReadySet.emplace_back(IR);
-
-    IR.invalidate();
-    ++RemovedElements;
-    std::iter_swap(I, E - RemovedElements);
-  }
-
-  WaitSet.resize(WaitSet.size() - RemovedElements);
-}
-
-InstRef Scheduler::select() {
-  unsigned QueueIndex = ReadySet.size();
-  for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
-    const InstRef &IR = ReadySet[I];
-    if (QueueIndex == ReadySet.size() ||
-        Strategy->compare(IR, ReadySet[QueueIndex])) {
-      const InstrDesc &D = IR.getInstruction()->getDesc();
-      if (Resources->canBeIssued(D))
-        QueueIndex = I;
-    }
-  }
-
-  if (QueueIndex == ReadySet.size())
-    return InstRef();
-
-  // We found an instruction to issue.
-  InstRef IR = ReadySet[QueueIndex];
-  std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
-  ReadySet.pop_back();
-  return IR;
-}
-
-void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
-  unsigned RemovedElements = 0;
-  for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
-    InstRef &IR = *I;
-    if (!IR.isValid())
-      break;
-    Instruction &IS = *IR.getInstruction();
-    if (!IS.isExecuted()) {
-      LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
-                        << " is still executing.\n");
-      ++I;
-      continue;
-    }
-
-    // Instruction IR has completed execution.
-    LSU->onInstructionExecuted(IR);
-    Executed.emplace_back(IR);
-    ++RemovedElements;
-    IR.invalidate();
-    std::iter_swap(I, E - RemovedElements);
-  }
-
-  IssuedSet.resize(IssuedSet.size() - RemovedElements);
-}
-
-void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
-                           SmallVectorImpl<InstRef> &Executed,
-                           SmallVectorImpl<InstRef> &Ready) {
-  // Release consumed resources.
-  Resources->cycleEvent(Freed);
-
-  // Propagate the cycle event to the 'Issued' and 'Wait' sets.
-  for (InstRef &IR : IssuedSet)
-    IR.getInstruction()->cycleEvent();
-
-  updateIssuedSet(Executed);
-
-  for (InstRef &IR : WaitSet)
-    IR.getInstruction()->cycleEvent();
-
-  promoteToReadySet(Ready);
-}
-
-bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
-  // Instructions that use an in-order dispatch/issue processor resource must be
-  // issued immediately to the pipeline(s). Any other in-order buffered
-  // resources (i.e. BufferSize=1) is consumed.
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc);
-}
-
-void Scheduler::dispatch(const InstRef &IR) {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  Resources->reserveBuffers(Desc.Buffers);
-
-  // If necessary, reserve queue entries in the load-store unit (LSU).
-  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
-  if (IsMemOp)
-    LSU->dispatch(IR);
-
-  if (!isReady(IR)) {
-    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
-    WaitSet.push_back(IR);
-    return;
-  }
-
-  // Don't add a zero-latency instruction to the Ready queue.
-  // A zero-latency instruction doesn't consume any scheduler resources. That is
-  // because it doesn't need to be executed, and it is often removed at register
-  // renaming stage. For example, register-register moves are often optimized at
-  // register renaming stage by simply updating register aliases. On some
-  // targets, zero-idiom instructions (for example: a xor that clears the value
-  // of a register) are treated specially, and are often eliminated at register
-  // renaming stage.
-  if (!mustIssueImmediately(IR)) {
-    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
-    ReadySet.push_back(IR);
-  }
-}
-
-bool Scheduler::isReady(const InstRef &IR) const {
-  const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
-  return IR.getInstruction()->isReady() && (!IsMemOp || LSU->isReady(IR));
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/SourceMgr.h
===================================================================
--- llvm/trunk/tools/llvm-mca/SourceMgr.h
+++ llvm/trunk/tools/llvm-mca/SourceMgr.h
@@ -1,64 +0,0 @@
-//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements class SourceMgr. Class SourceMgr abstracts the input
-/// code sequence (a sequence of MCInst), and assings unique identifiers to
-/// every instruction in the sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
-
-#include "llvm/MC/MCInst.h"
-#include <vector>
-
-namespace mca {
-
-typedef std::pair<unsigned, const llvm::MCInst *> SourceRef;
-
-class SourceMgr {
-  using InstVec = std::vector<std::unique_ptr<const llvm::MCInst>>;
-  const InstVec &Sequence;
-  unsigned Current;
-  unsigned Iterations;
-  static const unsigned DefaultIterations = 100;
-
-public:
-  SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
-      : Sequence(MCInstSequence), Current(0),
-        Iterations(NumIterations ? NumIterations : DefaultIterations) {}
-
-  unsigned getCurrentIteration() const { return Current / Sequence.size(); }
-  unsigned getNumIterations() const { return Iterations; }
-  unsigned size() const { return Sequence.size(); }
-  const InstVec &getSequence() const { return Sequence; }
-
-  bool hasNext() const { return Current < (Iterations * size()); }
-  void updateNext() { Current++; }
-
-  const SourceRef peekNext() const {
-    assert(hasNext() && "Already at end of sequence!");
-    unsigned Index = getCurrentInstructionIndex();
-    return SourceRef(Current, Sequence[Index].get());
-  }
-
-  unsigned getCurrentInstructionIndex() const {
-    return Current % Sequence.size();
-  }
-
-  const llvm::MCInst &getMCInstFromIndex(unsigned Index) const {
-    return *Sequence[Index % size()];
-  }
-
-  bool isEmpty() const { return size() == 0; }
-};
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/Stage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Stage.h
+++ llvm/trunk/tools/llvm-mca/Stage.h
@@ -1,86 +0,0 @@
-//===---------------------- Stage.h -----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a stage.
-/// A chain of stages compose an instruction pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H
-#define LLVM_TOOLS_LLVM_MCA_STAGE_H
-
-#include "HWEventListener.h"
-#include "llvm/Support/Error.h"
-#include <set>
-
-namespace mca {
-
-class InstRef;
-
-class Stage {
-  Stage *NextInSequence;
-  std::set<HWEventListener *> Listeners;
-
-  Stage(const Stage &Other) = delete;
-  Stage &operator=(const Stage &Other) = delete;
-
-protected:
-  const std::set<HWEventListener *> &getListeners() const { return Listeners; }
-
-public:
-  Stage() : NextInSequence(nullptr) {}
-  virtual ~Stage();
-
-  /// Returns true if it can execute IR during this cycle.
-  virtual bool isAvailable(const InstRef &IR) const { return true; }
-
-  /// Returns true if some instructions are still executing this stage.
-  virtual bool hasWorkToComplete() const = 0;
-
-  /// Called once at the start of each cycle.  This can be used as a setup
-  /// phase to prepare for the executions during the cycle.
-  virtual llvm::Error cycleStart() { return llvm::ErrorSuccess(); }
-
-  /// Called once at the end of each cycle.
-  virtual llvm::Error cycleEnd() { return llvm::ErrorSuccess(); }
-
-  /// The primary action that this stage performs on instruction IR.
-  virtual llvm::Error execute(InstRef &IR) = 0;
-
-  void setNextInSequence(Stage *NextStage) {
-    assert(!NextInSequence && "This stage already has a NextInSequence!");
-    NextInSequence = NextStage;
-  }
-
-  bool checkNextStage(const InstRef &IR) const {
-    return NextInSequence && NextInSequence->isAvailable(IR);
-  }
-
-  /// Called when an instruction is ready to move the next pipeline stage.
-  ///
-  /// Stages are responsible for moving instructions to their immediate
-  /// successor stages.
-  llvm::Error moveToTheNextStage(InstRef &IR) {
-    assert(checkNextStage(IR) && "Next stage is not ready!");
-    return NextInSequence->execute(IR);
-  }
-
-  /// Add a listener to receive callbacks during the execution of this stage.
-  void addListener(HWEventListener *Listener);
-
-  /// Notify listeners of a particular hardware event.
-  template <typename EventT> void notifyEvent(const EventT &Event) const {
-    for (HWEventListener *Listener : Listeners)
-      Listener->onEvent(Event);
-  }
-};
-
-} // namespace mca
-#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H
Index: llvm/trunk/tools/llvm-mca/Stage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Stage.cpp
+++ llvm/trunk/tools/llvm-mca/Stage.cpp
@@ -1,27 +0,0 @@
-//===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines a stage.
-/// A chain of stages compose an instruction pipeline.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Stage.h"
-
-namespace mca {
-
-// Pin the vtable here in the implementation file.
-Stage::~Stage() = default;
-
-void Stage::addListener(HWEventListener *Listener) {
-  Listeners.insert(Listener);
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/Support.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Support.h
+++ llvm/trunk/tools/llvm-mca/Support.h
@@ -1,58 +0,0 @@
-//===--------------------- Support.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Helper functions used by various pipeline components.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-/// Populates vector Masks with processor resource masks.
-///
-/// The number of bits set in a mask depends on the processor resource type.
-/// Each processor resource mask has at least one bit set. For groups, the
-/// number of bits set in the mask is equal to the cardinality of the group plus
-/// one. Excluding the most significant bit, the remaining bits in the mask
-/// identify processor resources that are part of the group.
-///
-/// Example:
-///
-///  ResourceA  -- Mask: 0b001
-///  ResourceB  -- Mask: 0b010
-///  ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
-///
-/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
-/// Each resource mask uniquely identifies a resource; both ResourceA and
-/// ResourceB only have one bit set.
-/// ResourceAB is a group; excluding the most significant bit in the mask, the
-/// remaining bits identify the composition of the group.
-///
-/// Resource masks are used by the ResourceManager to solve set membership
-/// problems with simple bit manipulation operations.
-void computeProcResourceMasks(const llvm::MCSchedModel &SM,
-                              llvm::SmallVectorImpl<uint64_t> &Masks);
-
-/// Compute the reciprocal block throughput from a set of processor resource
-/// cycles. The reciprocal block throughput is computed as the MAX between:
-///  - NumMicroOps / DispatchWidth
-///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
-double computeBlockRThroughput(const llvm::MCSchedModel &SM,
-                               unsigned DispatchWidth, unsigned NumMicroOps,
-                               llvm::ArrayRef<unsigned> ProcResourceUsage);
-} // namespace mca
-
-#endif
Index: llvm/trunk/tools/llvm-mca/Support.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Support.cpp
+++ llvm/trunk/tools/llvm-mca/Support.cpp
@@ -1,79 +0,0 @@
-//===--------------------- Support.cpp --------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements a few helper functions used by various pipeline
-/// components.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Support.h"
-#include "llvm/MC/MCSchedule.h"
-
-namespace mca {
-
-using namespace llvm;
-
-void computeProcResourceMasks(const MCSchedModel &SM,
-                              SmallVectorImpl<uint64_t> &Masks) {
-  unsigned ProcResourceID = 0;
-
-  // Create a unique bitmask for every processor resource unit.
-  // Skip resource at index 0, since it always references 'InvalidUnit'.
-  Masks.resize(SM.getNumProcResourceKinds());
-  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
-    if (Desc.SubUnitsIdxBegin)
-      continue;
-    Masks[I] = 1ULL << ProcResourceID;
-    ProcResourceID++;
-  }
-
-  // Create a unique bitmask for every processor resource group.
-  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
-    if (!Desc.SubUnitsIdxBegin)
-      continue;
-    Masks[I] = 1ULL << ProcResourceID;
-    for (unsigned U = 0; U < Desc.NumUnits; ++U) {
-      uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]];
-      Masks[I] |= OtherMask;
-    }
-    ProcResourceID++;
-  }
-}
-
-double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
-                               unsigned NumMicroOps,
-                               ArrayRef<unsigned> ProcResourceUsage) {
-  // The block throughput is bounded from above by the hardware dispatch
-  // throughput. That is because the DispatchWidth is an upper bound on the
-  // number of opcodes that can be part of a single dispatch group.
-  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
-
-  // The block throughput is also limited by the amount of hardware parallelism.
-  // The number of available resource units affects the resource pressure
-  // distribution, as well as how many blocks can be executed every cycle.
-  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
-    unsigned ResourceCycles = ProcResourceUsage[I];
-    if (!ResourceCycles)
-      continue;
-
-    const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
-    double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
-    Max = std::max(Max, Throughput);
-  }
-
-  // The block reciprocal throughput is computed as the MAX of:
-  //  - (NumMicroOps / DispatchWidth)
-  //  - (NumUnits / ResourceCycles)   for every consumed processor resource.
-  return Max;
-}
-
-} // namespace mca
Index: llvm/trunk/tools/llvm-mca/include/Context.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Context.h
+++ llvm/trunk/tools/llvm-mca/include/Context.h
@@ -0,0 +1,68 @@
+//===---------------------------- Context.h ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units.  A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_CONTEXT_H
+#define LLVM_TOOLS_LLVM_MCA_CONTEXT_H
+#include "HardwareUnits/HardwareUnit.h"
+#include "InstrBuilder.h"
+#include "Pipeline.h"
+#include "SourceMgr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include <memory>
+
+namespace mca {
+
+/// This is a convenience struct to hold the parameters necessary for creating
+/// the pre-built "default" out-of-order pipeline.
+struct PipelineOptions {
+  PipelineOptions(unsigned DW, unsigned RFS, unsigned LQS, unsigned SQS,
+                  bool NoAlias)
+      : DispatchWidth(DW), RegisterFileSize(RFS), LoadQueueSize(LQS),
+        StoreQueueSize(SQS), AssumeNoAlias(NoAlias) {}
+  unsigned DispatchWidth;
+  unsigned RegisterFileSize;
+  unsigned LoadQueueSize;
+  unsigned StoreQueueSize;
+  bool AssumeNoAlias;
+};
+
+class Context {
+  llvm::SmallVector<std::unique_ptr<HardwareUnit>, 4> Hardware;
+  const llvm::MCRegisterInfo &MRI;
+  const llvm::MCSubtargetInfo &STI;
+
+public:
+  Context(const llvm::MCRegisterInfo &R, const llvm::MCSubtargetInfo &S)
+      : MRI(R), STI(S) {}
+  Context(const Context &C) = delete;
+  Context &operator=(const Context &C) = delete;
+
+  void addHardwareUnit(std::unique_ptr<HardwareUnit> H) {
+    Hardware.push_back(std::move(H));
+  }
+
+  /// Construct a basic pipeline for simulating an out-of-order pipeline.
+  /// This pipeline consists of Fetch, Dispatch, Execute, and Retire stages.
+  std::unique_ptr<Pipeline> createDefaultPipeline(const PipelineOptions &Opts,
+                                                  InstrBuilder &IB,
+                                                  SourceMgr &SrcMgr);
+};
+
+} // namespace mca
+#endif // LLVM_TOOLS_LLVM_MCA_CONTEXT_H
Index: llvm/trunk/tools/llvm-mca/include/HWEventListener.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HWEventListener.h
+++ llvm/trunk/tools/llvm-mca/include/HWEventListener.h
@@ -0,0 +1,141 @@
+//===----------------------- HWEventListener.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the main interface for hardware event listeners.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
+#define LLVM_TOOLS_LLVM_MCA_HWEVENTLISTENER_H
+
+#include "Instruction.h"
+#include "llvm/ADT/ArrayRef.h"
+#include <utility>
+
+namespace mca {
+
+// An HWInstructionEvent represents state changes of instructions that
+// listeners might be interested in. Listeners can choose to ignore any event
+// they are not interested in.
+class HWInstructionEvent {
+public:
+  // This is the list of event types that are shared by all targets, that
+  // generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
+  // ...) and generic Views can manipulate.
+  // Subtargets are free to define additional event types, that are goin to be
+  // handled by generic components as opaque values, but can still be
+  // emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
+  // DispatchStage, ...) and interpreted by subtarget-specific EventListener
+  // implementations.
+  enum GenericEventType {
+    Invalid = 0,
+    // Events generated by the Retire Control Unit.
+    Retired,
+    // Events generated by the Scheduler.
+    Ready,
+    Issued,
+    Executed,
+    // Events generated by the Dispatch logic.
+    Dispatched,
+
+    LastGenericEventType,
+  };
+
+  HWInstructionEvent(unsigned type, const InstRef &Inst)
+      : Type(type), IR(Inst) {}
+
+  // The event type. The exact meaning depends on the subtarget.
+  const unsigned Type;
+
+  // The instruction this event was generated for.
+  const InstRef &IR;
+};
+
+class HWInstructionIssuedEvent : public HWInstructionEvent {
+public:
+  using ResourceRef = std::pair<uint64_t, uint64_t>;
+  HWInstructionIssuedEvent(const InstRef &IR,
+                           llvm::ArrayRef<std::pair<ResourceRef, double>> UR)
+      : HWInstructionEvent(HWInstructionEvent::Issued, IR), UsedResources(UR) {}
+
+  llvm::ArrayRef<std::pair<ResourceRef, double>> UsedResources;
+};
+
+class HWInstructionDispatchedEvent : public HWInstructionEvent {
+public:
+  HWInstructionDispatchedEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+      : HWInstructionEvent(HWInstructionEvent::Dispatched, IR),
+        UsedPhysRegs(Regs) {}
+  // Number of physical register allocated for this instruction. There is one
+  // entry per register file.
+  llvm::ArrayRef<unsigned> UsedPhysRegs;
+};
+
+class HWInstructionRetiredEvent : public HWInstructionEvent {
+public:
+  HWInstructionRetiredEvent(const InstRef &IR, llvm::ArrayRef<unsigned> Regs)
+      : HWInstructionEvent(HWInstructionEvent::Retired, IR),
+        FreedPhysRegs(Regs) {}
+  // Number of register writes that have been architecturally committed. There
+  // is one entry per register file.
+  llvm::ArrayRef<unsigned> FreedPhysRegs;
+};
+
+// A HWStallEvent represents a pipeline stall caused by the lack of hardware
+// resources.
+class HWStallEvent {
+public:
+  enum GenericEventType {
+    Invalid = 0,
+    // Generic stall events generated by the DispatchStage.
+    RegisterFileStall,
+    RetireControlUnitStall,
+    // Generic stall events generated by the Scheduler.
+    DispatchGroupStall,
+    SchedulerQueueFull,
+    LoadQueueFull,
+    StoreQueueFull,
+    LastGenericEvent
+  };
+
+  HWStallEvent(unsigned type, const InstRef &Inst) : Type(type), IR(Inst) {}
+
+  // The exact meaning of the stall event type depends on the subtarget.
+  const unsigned Type;
+
+  // The instruction this event was generated for.
+  const InstRef &IR;
+};
+
+class HWEventListener {
+public:
+  // Generic events generated by the pipeline.
+  virtual void onCycleBegin() {}
+  virtual void onCycleEnd() {}
+
+  virtual void onEvent(const HWInstructionEvent &Event) {}
+  virtual void onEvent(const HWStallEvent &Event) {}
+
+  using ResourceRef = std::pair<uint64_t, uint64_t>;
+  virtual void onResourceAvailable(const ResourceRef &RRef) {}
+
+  // Events generated by the Scheduler when buffered resources are
+  // consumed/freed.
+  virtual void onReservedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
+  virtual void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) {}
+
+  virtual ~HWEventListener() {}
+
+private:
+  virtual void anchor();
+};
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/HardwareUnit.h
@@ -0,0 +1,31 @@
+//===-------------------------- HardwareUnit.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a base class for describing a simulated hardware
+/// unit.  These units are used to construct a simulated backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+#define LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
+
+namespace mca {
+
+class HardwareUnit {
+  HardwareUnit(const HardwareUnit &H) = delete;
+  HardwareUnit &operator=(const HardwareUnit &H) = delete;
+
+public:
+  HardwareUnit() = default;
+  virtual ~HardwareUnit();
+};
+
+} // namespace mca
+#endif // LLVM_TOOLS_LLVM_MCA_HARDWAREUNIT_H
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/LSUnit.h
@@ -0,0 +1,161 @@
+//===------------------------- LSUnit.h --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load/Store unit class that models load/store queues and that implements
+/// a simple weak memory consistency model.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_LSUNIT_H
+#define LLVM_TOOLS_LLVM_MCA_LSUNIT_H
+
+#include "HardwareUnits/HardwareUnit.h"
+#include <set>
+
+namespace mca {
+
+class InstRef;
+struct InstrDesc;
+
+/// A Load/Store Unit implementing a load and store queues.
+///
+/// This class implements a load queue and a store queue to emulate the
+/// out-of-order execution of memory operations.
+/// Each load (or store) consumes an entry in the load (or store) queue.
+///
+/// Rules are:
+/// 1) A younger load is allowed to pass an older load only if there are no
+///    stores nor barriers in between the two loads.
+/// 2) An younger store is not allowed to pass an older store.
+/// 3) A younger store is not allowed to pass an older load.
+/// 4) A younger load is allowed to pass an older store only if the load does
+///    not alias with the store.
+///
+/// This class optimistically assumes that loads don't alias store operations.
+/// Under this assumption, younger loads are always allowed to pass older
+/// stores (this would only affects rule 4).
+/// Essentially, this LSUnit doesn't attempt to run any sort alias analysis to
+/// predict when loads and stores don't alias with eachother.
+///
+/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
+/// set to `false` by the constructor of LSUnit.
+///
+/// In the case of write-combining memory, rule 2. could be relaxed to allow
+/// reordering of non-aliasing store operations. At the moment, this is not
+/// allowed.
+/// To put it in another way, there is no option to specify a different memory
+/// type for memory operations (example: write-through, write-combining, etc.).
+/// Also, there is no way to weaken the memory model, and this unit currently
+/// doesn't support write-combining behavior.
+///
+/// No assumptions are made on the size of the store buffer.
+/// As mentioned before, this class doesn't perform alias analysis.
+/// Consequently,  LSUnit doesn't know how to identify cases where
+/// store-to-load forwarding may occur.
+///
+/// LSUnit doesn't attempt to predict whether a load or store hits or misses
+/// the L1 cache. To be more specific, LSUnit doesn't know anything about
+/// the cache hierarchy and memory types.
+/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
+/// scheduling model provides an "optimistic" load-to-use latency (which usually
+/// matches the load-to-use latency for when there is a hit in the L1D).
+///
+/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
+/// memory-barrier like instructions.
+/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
+/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
+/// serializes loads without forcing a flush of the load queue.
+/// Similarly, instructions that both `mayStore` and have `unmodeled side
+/// effects` are treated like store barriers. A full memory
+/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
+/// effects. This is obviously inaccurate, but this is the best that we can do
+/// at the moment.
+///
+/// Each load/store barrier consumes one entry in the load/store queue. A
+/// load/store barrier enforces ordering of loads/stores:
+///  - A younger load cannot pass a load barrier.
+///  - A younger store cannot pass a store barrier.
+///
+/// A younger load has to wait for the memory load barrier to execute.
+/// A load/store barrier is "executed" when it becomes the oldest entry in
+/// the load/store queue(s). That also means, all the older loads/stores have
+/// already been executed.
+class LSUnit : public HardwareUnit {
+  // Load queue size.
+  // LQ_Size == 0 means that there are infinite slots in the load queue.
+  unsigned LQ_Size;
+
+  // Store queue size.
+  // SQ_Size == 0 means that there are infinite slots in the store queue.
+  unsigned SQ_Size;
+
+  // If true, loads will never alias with stores. This is the default.
+  bool NoAlias;
+
+  std::set<unsigned> LoadQueue;
+  std::set<unsigned> StoreQueue;
+
+  void assignLQSlot(unsigned Index);
+  void assignSQSlot(unsigned Index);
+  bool isReadyNoAlias(unsigned Index) const;
+
+  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a store barrier. It forces older store to be
+  // executed before newer stores are issued.
+  std::set<unsigned> StoreBarriers;
+
+  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
+  // conservatively treated as a load barrier. It forces older loads to execute
+  // before newer loads are issued.
+  std::set<unsigned> LoadBarriers;
+
+  bool isSQEmpty() const { return StoreQueue.empty(); }
+  bool isLQEmpty() const { return LoadQueue.empty(); }
+  bool isSQFull() const { return SQ_Size != 0 && StoreQueue.size() == SQ_Size; }
+  bool isLQFull() const { return LQ_Size != 0 && LoadQueue.size() == LQ_Size; }
+
+public:
+  LSUnit(unsigned LQ = 0, unsigned SQ = 0, bool AssumeNoAlias = false)
+      : LQ_Size(LQ), SQ_Size(SQ), NoAlias(AssumeNoAlias) {}
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+
+  enum Status {
+    LSU_AVAILABLE = 0,
+    LSU_LQUEUE_FULL,
+    LSU_SQUEUE_FULL
+  };
+
+  // Returns LSU_AVAILABLE if there are enough load/store queue entries to serve
+  // IR. It also returns LSU_AVAILABLE if IR is not a memory operation.
+  Status isAvailable(const InstRef &IR) const;
+
+  // Allocates load/store queue resources for IR.
+  //
+  // This method assumes that a previous call to `isAvailable(IR)` returned
+  // LSU_AVAILABLE, and that IR is a memory operation.
+  void dispatch(const InstRef &IR);
+
+  // By default, rules are:
+  // 1. A store may not pass a previous store.
+  // 2. A load may not pass a previous store unless flag 'NoAlias' is set.
+  // 3. A load may pass a previous load.
+  // 4. A store may not pass a previous load (regardless of flag 'NoAlias').
+  // 5. A load has to wait until an older load barrier is fully executed.
+  // 6. A store has to wait until an older store barrier is fully executed.
+  virtual bool isReady(const InstRef &IR) const;
+  void onInstructionExecuted(const InstRef &IR);
+};
+
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RegisterFile.h
@@ -0,0 +1,171 @@
+//===--------------------- RegisterFile.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class.  This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+#define LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
+
+#include "HardwareUnits/HardwareUnit.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Error.h"
+
+namespace mca {
+
+class ReadState;
+class WriteState;
+class WriteRef;
+
+/// Manages hardware register files, and tracks register definitions for
+/// register renaming purposes.
+class RegisterFile : public HardwareUnit {
+  const llvm::MCRegisterInfo &MRI;
+
+  // Each register file is associated with an instance of
+  // RegisterMappingTracker.
+  // A RegisterMappingTracker keeps track of the number of physical registers
+  // which have been dynamically allocated by the simulator.
+  struct RegisterMappingTracker {
+    // The total number of physical registers that are available in this
+    // register file for register renaming purpouses.  A value of zero for this
+    // field means: this register file has an unbounded number of physical
+    // registers.
+    const unsigned NumPhysRegs;
+    // Number of physical registers that are currently in use.
+    unsigned NumUsedPhysRegs;
+
+    RegisterMappingTracker(unsigned NumPhysRegisters)
+        : NumPhysRegs(NumPhysRegisters), NumUsedPhysRegs(0) {}
+  };
+
+  // A vector of register file descriptors.  This set always contains at least
+  // one entry. Entry at index #0 is reserved.  That entry describes a register
+  // file with an unbounded number of physical registers that "sees" all the
+  // hardware registers declared by the target (i.e. all the register
+  // definitions in the target specific `XYZRegisterInfo.td` - where `XYZ` is
+  // the target name).
+  //
+  // Users can limit the number of physical registers that are available in
+  // regsiter file #0 specifying command line flag `-register-file-size=<uint>`.
+  llvm::SmallVector<RegisterMappingTracker, 4> RegisterFiles;
+
+  // This type is used to propagate information about the owner of a register,
+  // and the cost of allocating it in the PRF. Register cost is defined as the
+  // number of physical registers consumed by the PRF to allocate a user
+  // register.
+  //
+  // For example: on X86 BtVer2, a YMM register consumes 2 128-bit physical
+  // registers. So, the cost of allocating a YMM register in BtVer2 is 2.
+  using IndexPlusCostPairTy = std::pair<unsigned, unsigned>;
+
+  // Struct RegisterRenamingInfo maps registers to register files.
+  // There is a RegisterRenamingInfo object for every register defined by
+  // the target. RegisteRenamingInfo objects are stored into vector
+  // RegisterMappings, and register IDs can be used to reference them.
+  struct RegisterRenamingInfo {
+    IndexPlusCostPairTy IndexPlusCost;
+    llvm::MCPhysReg RenameAs;
+  };
+
+  // RegisterMapping objects are mainly used to track physical register
+  // definitions. There is a RegisterMapping for every register defined by the
+  // Target. For each register, a RegisterMapping pair contains a descriptor of
+  // the last register write (in the form of a WriteRef object), as well as a
+  // RegisterRenamingInfo to quickly identify owning register files.
+  //
+  // This implementation does not allow overlapping register files. The only
+  // register file that is allowed to overlap with other register files is
+  // register file #0. If we exclude register #0, every register is "owned" by
+  // at most one register file.
+  using RegisterMapping = std::pair<WriteRef, RegisterRenamingInfo>;
+
+  // This map contains one entry for each register defined by the target.
+  std::vector<RegisterMapping> RegisterMappings;
+
+  // This method creates a new register file descriptor.
+  // The new register file owns all of the registers declared by register
+  // classes in the 'RegisterClasses' set.
+  //
+  // Processor models allow the definition of RegisterFile(s) via tablegen. For
+  // example, this is a tablegen definition for a x86 register file for
+  // XMM[0-15] and YMM[0-15], that allows up to 60 renames (each rename costs 1
+  // physical register).
+  //
+  //    def FPRegisterFile : RegisterFile<60, [VR128RegClass, VR256RegClass]>
+  //
+  // Here FPRegisterFile contains all the registers defined by register class
+  // VR128RegClass and VR256RegClass. FPRegisterFile implements 60
+  // registers which can be used for register renaming purpose.
+  void
+  addRegisterFile(llvm::ArrayRef<llvm::MCRegisterCostEntry> RegisterClasses,
+                  unsigned NumPhysRegs);
+
+  // Consumes physical registers in each register file specified by the
+  // `IndexPlusCostPairTy`. This method is called from `addRegisterMapping()`.
+  void allocatePhysRegs(const RegisterRenamingInfo &Entry,
+                        llvm::MutableArrayRef<unsigned> UsedPhysRegs);
+
+  // Releases previously allocated physical registers from the register file(s).
+  // This method is called from `invalidateRegisterMapping()`.
+  void freePhysRegs(const RegisterRenamingInfo &Entry,
+                    llvm::MutableArrayRef<unsigned> FreedPhysRegs);
+
+  // Create an instance of RegisterMappingTracker for every register file
+  // specified by the processor model.
+  // If no register file is specified, then this method creates a default
+  // register file with an unbounded number of physical registers.
+  void initialize(const llvm::MCSchedModel &SM, unsigned NumRegs);
+
+public:
+  RegisterFile(const llvm::MCSchedModel &SM, const llvm::MCRegisterInfo &mri,
+               unsigned NumRegs = 0);
+
+  // This method updates the register mappings inserting a new register
+  // definition. This method is also responsible for updating the number of
+  // allocated physical registers in each register file modified by the write.
+  // No physical regiser is allocated when flag ShouldAllocatePhysRegs is set.
+  void addRegisterWrite(WriteRef Write,
+                        llvm::MutableArrayRef<unsigned> UsedPhysRegs,
+                        bool ShouldAllocatePhysRegs = true);
+
+  // Removes write \param WS from the register mappings.
+  // Physical registers may be released to reflect this update.
+  void removeRegisterWrite(const WriteState &WS,
+                           llvm::MutableArrayRef<unsigned> FreedPhysRegs,
+                           bool ShouldFreePhysRegs = true);
+
+  // Checks if there are enough physical registers in the register files.
+  // Returns a "response mask" where each bit represents the response from a
+  // different register file.  A mask of all zeroes means that all register
+  // files are available.  Otherwise, the mask can be used to identify which
+  // register file was busy.  This sematic allows us to classify dispatch
+  // stalls caused by the lack of register file resources.
+  //
+  // Current implementation can simulate up to 32 register files (including the
+  // special register file at index #0).
+  unsigned isAvailable(llvm::ArrayRef<unsigned> Regs) const;
+  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Writes,
+                     unsigned RegID) const;
+  unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_REGISTER_FILE_H
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/ResourceManager.h
@@ -0,0 +1,360 @@
+//===--------------------- ResourceManager.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
+#define LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
+
+#include "Instruction.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace mca {
+
+/// Used to notify the internal state of a processor resource.
+///
+/// A processor resource is available if it is not reserved, and there are
+/// available slots in the buffer.  A processor resource is unavailable if it
+/// is either reserved, or the associated buffer is full. A processor resource
+/// with a buffer size of -1 is always available if it is not reserved.
+///
+/// Values of type ResourceStateEvent are returned by method
+/// ResourceState::isBufferAvailable(), which is used to query the internal
+/// state of a resource.
+///
+/// The naming convention for resource state events is:
+///  * Event names start with prefix RS_
+///  * Prefix RS_ is followed by a string describing the actual resource state.
+enum ResourceStateEvent {
+  RS_BUFFER_AVAILABLE,
+  RS_BUFFER_UNAVAILABLE,
+  RS_RESERVED
+};
+
+/// Resource allocation strategy used by hardware scheduler resources.
+class ResourceStrategy {
+  ResourceStrategy(const ResourceStrategy &) = delete;
+  ResourceStrategy &operator=(const ResourceStrategy &) = delete;
+
+public:
+  ResourceStrategy() {}
+  virtual ~ResourceStrategy();
+
+  /// Selects a processor resource unit from a ReadyMask.
+  virtual uint64_t select(uint64_t ReadyMask) = 0;
+
+  /// Called by the ResourceManager when a processor resource group, or a
+  /// processor resource with multiple units has become unavailable.
+  ///
+  /// The default strategy uses this information to bias its selection logic.
+  virtual void used(uint64_t ResourceMask) {}
+};
+
+/// Default resource allocation strategy used by processor resource groups and
+/// processor resources with multiple units.
+class DefaultResourceStrategy final : public ResourceStrategy {
+  /// A Mask of resource unit identifiers.
+  ///
+  /// There is one bit set for every available resource unit.
+  /// It defaults to the value of field ResourceSizeMask in ResourceState.
+  const unsigned ResourceUnitMask;
+
+  /// A simple round-robin selector for processor resource units.
+  /// Each bit of this mask identifies a sub resource within a group.
+  ///
+  /// As an example, lets assume that this is a default policy for a
+  /// processor resource group composed by the following three units:
+  ///   ResourceA -- 0b001
+  ///   ResourceB -- 0b010
+  ///   ResourceC -- 0b100
+  ///
+  /// Field NextInSequenceMask is used to select the next unit from the set of
+  /// resource units. It defaults to the value of field `ResourceUnitMasks` (in
+  /// this example, it defaults to mask '0b111').
+  ///
+  /// The round-robin selector would firstly select 'ResourceC', then
+  /// 'ResourceB', and eventually 'ResourceA'.  When a resource R is used, the
+  /// corresponding bit in NextInSequenceMask is cleared.  For example, if
+  /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes
+  /// 0xb011.
+  ///
+  /// When NextInSequenceMask becomes zero, it is automatically reset to the
+  /// default value (i.e. ResourceUnitMask).
+  uint64_t NextInSequenceMask;
+
+  /// This field is used to track resource units that are used (i.e. selected)
+  /// by other groups other than the one associated with this strategy object.
+  ///
+  /// In LLVM processor resource groups are allowed to partially (or fully)
+  /// overlap. That means, a same unit may be visible to multiple groups.
+  /// This field keeps track of uses that have originated from outside of
+  /// this group. The idea is to bias the selection strategy, so that resources
+  /// that haven't been used by other groups get prioritized.
+  ///
+  /// The end goal is to (try to) keep the resource distribution as much uniform
+  /// as possible. By construction, this mask only tracks one-level of resource
+  /// usage. Therefore, this strategy is expected to be less accurate when same
+  /// units are used multiple times by other groups within a single round of
+  /// select.
+  ///
+  /// Note: an LRU selector would have a better accuracy at the cost of being
+  /// slightly more expensive (mostly in terms of runtime cost). Methods
+  /// 'select' and 'used', are always in the hot execution path of llvm-mca.
+  /// Therefore, a slow implementation of 'select' would have a negative impact
+  /// on the overall performance of the tool.
+  uint64_t RemovedFromNextInSequence;
+
+  void skipMask(uint64_t Mask);
+
+public:
+  DefaultResourceStrategy(uint64_t UnitMask)
+      : ResourceStrategy(), ResourceUnitMask(UnitMask),
+        NextInSequenceMask(UnitMask), RemovedFromNextInSequence(0) {}
+  virtual ~DefaultResourceStrategy() = default;
+
+  uint64_t select(uint64_t ReadyMask) override;
+  void used(uint64_t Mask) override;
+};
+
+/// A processor resource descriptor.
+///
+/// There is an instance of this class for every processor resource defined by
+/// the machine scheduling model.
+/// Objects of class ResourceState dynamically track the usage of processor
+/// resource units.
+class ResourceState {
+  /// An index to the MCProcResourceDesc entry in the processor model.
+  const unsigned ProcResourceDescIndex;
+  /// A resource mask. This is generated by the tool with the help of
+  /// function `mca::createProcResourceMasks' (see Support.h).
+  const uint64_t ResourceMask;
+
+  /// A ProcResource can have multiple units.
+  ///
+  /// For processor resource groups,
+  /// this field default to the value of field `ResourceMask`; the number of
+  /// bits set is equal to the cardinality of the group.  For normal (i.e.
+  /// non-group) resources, the number of bits set in this mask is equivalent
+  /// to the number of units declared by the processor model (see field
+  /// 'NumUnits' in 'ProcResourceUnits').
+  uint64_t ResourceSizeMask;
+
+  /// A mask of ready units.
+  uint64_t ReadyMask;
+
+  /// Buffered resources will have this field set to a positive number different
+  /// than zero. A buffered resource behaves like a reservation station
+  /// implementing its own buffer for out-of-order execution.
+  ///
+  /// A BufferSize of 1 is used by scheduler resources that force in-order
+  /// execution.
+  ///
+  /// A BufferSize of 0 is used to model in-order issue/dispatch resources.
+  /// Since in-order issue/dispatch resources don't implement buffers, dispatch
+  /// events coincide with issue events.
+  /// Also, no other instruction ca be dispatched/issue while this resource is
+  /// in use. Only when all the "resource cycles" are consumed (after the issue
+  /// event), a new instruction ca be dispatched.
+  const int BufferSize;
+
+  /// Available slots in the buffer (zero, if this is not a buffered resource).
+  unsigned AvailableSlots;
+
+  /// This field is set if this resource is currently reserved.
+  ///
+  /// Resources can be reserved for a number of cycles.
+  /// Instructions can still be dispatched to reserved resources. However,
+  /// istructions dispatched to a reserved resource cannot be issued to the
+  /// underlying units (i.e. pipelines) until the resource is released.
+  bool Unavailable;
+
+  /// Checks for the availability of unit 'SubResMask' in the group.
+  bool isSubResourceReady(uint64_t SubResMask) const {
+    return ReadyMask & SubResMask;
+  }
+
+public:
+  ResourceState(const llvm::MCProcResourceDesc &Desc, unsigned Index,
+                uint64_t Mask);
+
+  unsigned getProcResourceID() const { return ProcResourceDescIndex; }
+  uint64_t getResourceMask() const { return ResourceMask; }
+  uint64_t getReadyMask() const { return ReadyMask; }
+  int getBufferSize() const { return BufferSize; }
+
+  bool isBuffered() const { return BufferSize > 0; }
+  bool isInOrder() const { return BufferSize == 1; }
+
+  /// Returns true if this is an in-order dispatch/issue resource.
+  bool isADispatchHazard() const { return BufferSize == 0; }
+  bool isReserved() const { return Unavailable; }
+
+  void setReserved() { Unavailable = true; }
+  void clearReserved() { Unavailable = false; }
+
+  /// Returs true if this resource is not reserved, and if there are at least
+  /// `NumUnits` available units.
+  bool isReady(unsigned NumUnits = 1) const;
+
+  bool isAResourceGroup() const {
+    return llvm::countPopulation(ResourceMask) > 1;
+  }
+
+  bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
+
+  void markSubResourceAsUsed(uint64_t ID) {
+    assert(isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  void releaseSubResource(uint64_t ID) {
+    assert(!isSubResourceReady(ID));
+    ReadyMask ^= ID;
+  }
+
+  unsigned getNumUnits() const {
+    return isAResourceGroup() ? 1U : llvm::countPopulation(ResourceSizeMask);
+  }
+
+  /// Checks if there is an available slot in the resource buffer.
+  ///
+  /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if
+  /// there is a slot available.
+  ///
+  /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it
+  /// is reserved.
+  ///
+  /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots.
+  ResourceStateEvent isBufferAvailable() const;
+
+  /// Reserve a slot in the buffer.
+  void reserveBuffer() {
+    if (AvailableSlots)
+      AvailableSlots--;
+  }
+
+  /// Release a slot in the buffer.
+  void releaseBuffer() {
+    if (BufferSize > 0)
+      AvailableSlots++;
+    assert(AvailableSlots <= static_cast<unsigned>(BufferSize));
+  }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// A resource unit identifier.
+///
+/// This is used to identify a specific processor resource unit using a pair
+/// of indices where the 'first' index is a processor resource mask, and the
+/// 'second' index is an index for a "sub-resource" (i.e. unit).
+typedef std::pair<uint64_t, uint64_t> ResourceRef;
+
+// First: a MCProcResourceDesc index identifying a buffered resource.
+// Second: max number of buffer entries used in this resource.
+typedef std::pair<unsigned, unsigned> BufferUsageEntry;
+
+/// A resource manager for processor resource units and groups.
+///
+/// This class owns all the ResourceState objects, and it is responsible for
+/// acting on requests from a Scheduler by updating the internal state of
+/// ResourceState objects.
+/// This class doesn't know about instruction itineraries and functional units.
+/// In future, it can be extended to support itineraries too through the same
+/// public interface.
+class ResourceManager {
+  // The resource manager owns all the ResourceState.
+  std::vector<std::unique_ptr<ResourceState>> Resources;
+  std::vector<std::unique_ptr<ResourceStrategy>> Strategies;
+
+  // Keeps track of which resources are busy, and how many cycles are left
+  // before those become usable again.
+  llvm::SmallDenseMap<ResourceRef, unsigned> BusyResources;
+
+  // A table to map processor resource IDs to processor resource masks.
+  llvm::SmallVector<uint64_t, 8> ProcResID2Mask;
+
+  // Returns the actual resource unit that will be used.
+  ResourceRef selectPipe(uint64_t ResourceID);
+
+  void use(const ResourceRef &RR);
+  void release(const ResourceRef &RR);
+
+  unsigned getNumUnits(uint64_t ResourceID) const;
+
+  // Overrides the selection strategy for the processor resource with the given
+  // mask.
+  void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                             uint64_t ResourceMask);
+
+public:
+  ResourceManager(const llvm::MCSchedModel &SM);
+  virtual ~ResourceManager() = default;
+
+  // Overrides the selection strategy for the resource at index ResourceID in
+  // the MCProcResourceDesc table.
+  void setCustomStrategy(std::unique_ptr<ResourceStrategy> S,
+                         unsigned ResourceID) {
+    assert(ResourceID < ProcResID2Mask.size() &&
+           "Invalid resource index in input!");
+    return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]);
+  }
+
+  // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if
+  // there are enough available slots in the buffers.
+  ResourceStateEvent canBeDispatched(llvm::ArrayRef<uint64_t> Buffers) const;
+
+  // Return the processor resource identifier associated to this Mask.
+  unsigned resolveResourceMask(uint64_t Mask) const;
+
+  // Consume a slot in every buffered resource from array 'Buffers'. Resource
+  // units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved.
+  void reserveBuffers(llvm::ArrayRef<uint64_t> Buffers);
+
+  // Release buffer entries previously allocated by method reserveBuffers.
+  void releaseBuffers(llvm::ArrayRef<uint64_t> Buffers);
+
+  // Reserve a processor resource. A reserved resource is not available for
+  // instruction issue until it is released.
+  void reserveResource(uint64_t ResourceID);
+
+  // Release a previously reserved processor resource.
+  void releaseResource(uint64_t ResourceID);
+
+  // Returns true if all resources are in-order, and there is at least one
+  // resource which is a dispatch hazard (BufferSize = 0).
+  bool mustIssueImmediately(const InstrDesc &Desc) const;
+
+  bool canBeIssued(const InstrDesc &Desc) const;
+
+  void issueInstruction(
+      const InstrDesc &Desc,
+      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
+
+  void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &ResourcesFreed);
+
+#ifndef NDEBUG
+  void dump() const {
+    for (const std::unique_ptr<ResourceState> &Resource : Resources)
+      Resource->dump();
+  }
+#endif
+};
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_RESOURCE_MANAGER_H
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/RetireControlUnit.h
@@ -0,0 +1,97 @@
+//===---------------------- RetireControlUnit.h -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+#define LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
+
+#include "HardwareUnits/HardwareUnit.h"
+#include "Instruction.h"
+#include "llvm/MC/MCSchedule.h"
+#include <vector>
+
+namespace mca {
+
+/// This class tracks which instructions are in-flight (i.e., dispatched but not
+/// retired) in the OoO backend.
+//
+/// This class checks on every cycle if/which instructions can be retired.
+/// Instructions are retired in program order.
+/// In the event of an instruction being retired, the pipeline that owns
+/// this RetireControlUnit (RCU) gets notified.
+///
+/// On instruction retired, register updates are all architecturally
+/// committed, and any physicall registers previously allocated for the
+/// retired instruction are freed.
+struct RetireControlUnit : public HardwareUnit {
+  // A RUToken is created by the RCU for every instruction dispatched to the
+  // schedulers.  These "tokens" are managed by the RCU in its token Queue.
+  //
+  // On every cycle ('cycleEvent'), the RCU iterates through the token queue
+  // looking for any token with its 'Executed' flag set.  If a token has that
+  // flag set, then the instruction has reached the write-back stage and will
+  // be retired by the RCU.
+  //
+  // 'NumSlots' represents the number of entries consumed by the instruction in
+  // the reorder buffer. Those entries will become available again once the
+  // instruction is retired.
+  //
+  // Note that the size of the reorder buffer is defined by the scheduling
+  // model via field 'NumMicroOpBufferSize'.
+  struct RUToken {
+    InstRef IR;
+    unsigned NumSlots; // Slots reserved to this instruction.
+    bool Executed;     // True if the instruction is past the WB stage.
+  };
+
+private:
+  unsigned NextAvailableSlotIdx;
+  unsigned CurrentInstructionSlotIdx;
+  unsigned AvailableSlots;
+  unsigned MaxRetirePerCycle; // 0 means no limit.
+  std::vector<RUToken> Queue;
+
+public:
+  RetireControlUnit(const llvm::MCSchedModel &SM);
+
+  bool isEmpty() const { return AvailableSlots == Queue.size(); }
+  bool isAvailable(unsigned Quantity = 1) const {
+    // Some instructions may declare a number of uOps which exceeds the size
+    // of the reorder buffer. To avoid problems, cap the amount of slots to
+    // the size of the reorder buffer.
+    Quantity = std::min(Quantity, static_cast<unsigned>(Queue.size()));
+    return AvailableSlots >= Quantity;
+  }
+
+  unsigned getMaxRetirePerCycle() const { return MaxRetirePerCycle; }
+
+  // Reserves a number of slots, and returns a new token.
+  unsigned reserveSlot(const InstRef &IS, unsigned NumMicroOps);
+
+  // Return the current token from the RCU's circular token queue.
+  const RUToken &peekCurrentToken() const;
+
+  // Advance the pointer to the next token in the circular token queue.
+  void consumeCurrentToken();
+
+  // Update the RCU token to represent the executed state.
+  void onInstructionExecuted(unsigned TokenID);
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_CONTROL_UNIT_H
Index: llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h
+++ llvm/trunk/tools/llvm-mca/include/HardwareUnits/Scheduler.h
@@ -0,0 +1,212 @@
+//===--------------------- Scheduler.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A scheduler for Processor Resource Units and Processor Resource Groups.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
+#define LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
+
+#include "HardwareUnits/HardwareUnit.h"
+#include "HardwareUnits/LSUnit.h"
+#include "ResourceManager.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace mca {
+
+class SchedulerStrategy {
+public:
+  SchedulerStrategy() = default;
+  virtual ~SchedulerStrategy();
+
+  /// Returns true if Lhs should take priority over Rhs.
+  ///
+  /// This method is used by class Scheduler to select the "best" ready
+  /// instruction to issue to the underlying pipelines.
+  virtual bool compare(const InstRef &Lhs, const InstRef &Rhs) const = 0;
+};
+
+/// Default instruction selection strategy used by class Scheduler.
+class DefaultSchedulerStrategy : public SchedulerStrategy {
+  /// This method ranks instructions based on their age, and the number of known
+  /// users. The lower the rank value, the better.
+  int computeRank(const InstRef &Lhs) const {
+    return Lhs.getSourceIndex() - Lhs.getInstruction()->getNumUsers();
+  }
+
+public:
+  DefaultSchedulerStrategy() = default;
+  virtual ~DefaultSchedulerStrategy();
+
+  bool compare(const InstRef &Lhs, const InstRef &Rhs) const override {
+    int LhsRank = computeRank(Lhs);
+    int RhsRank = computeRank(Rhs);
+
+    /// Prioritize older instructions over younger instructions to minimize the
+    /// pressure on the reorder buffer.
+    if (LhsRank == RhsRank)
+      return Lhs.getSourceIndex() < Rhs.getSourceIndex();
+    return LhsRank < RhsRank;
+  }
+};
+
+/// Class Scheduler is responsible for issuing instructions to pipeline
+/// resources.
+///
+/// Internally, it delegates to a ResourceManager the management of processor
+/// resources. This class is also responsible for tracking the progress of
+/// instructions from the dispatch stage, until the write-back stage.
+///
+/// An instruction dispatched to the Scheduler is initially placed into either
+/// the 'WaitSet' or the 'ReadySet' depending on the availability of the input
+/// operands.
+///
+/// An instruction is moved from the WaitSet to the ReadySet when register
+/// operands become available, and all memory dependencies are met.
+/// Instructions that are moved from the WaitSet to the ReadySet transition
+/// in state from 'IS_AVAILABLE' to 'IS_READY'.
+///
+/// On every cycle, the Scheduler checks if it can promote instructions from the
+/// WaitSet to the ReadySet.
+///
+/// An Instruction is moved from the ReadySet the `IssuedSet` when it is issued
+/// to a (one or more) pipeline(s). This event also causes an instruction state
+/// transition (i.e. from state IS_READY, to state IS_EXECUTING). An Instruction
+/// leaves the IssuedSet when it reaches the write-back stage.
+class Scheduler : public HardwareUnit {
+  LSUnit *LSU;
+
+  // Instruction selection strategy for this Scheduler.
+  std::unique_ptr<SchedulerStrategy> Strategy;
+
+  // Hardware resources that are managed by this scheduler.
+  std::unique_ptr<ResourceManager> Resources;
+
+  std::vector<InstRef> WaitSet;
+  std::vector<InstRef> ReadySet;
+  std::vector<InstRef> IssuedSet;
+
+  /// Verify the given selection strategy and set the Strategy member
+  /// accordingly.  If no strategy is provided, the DefaultSchedulerStrategy is
+  /// used.
+  void initializeStrategy(std::unique_ptr<SchedulerStrategy> S);
+
+  /// Issue an instruction without updating the ready queue.
+  void issueInstructionImpl(
+      InstRef &IR,
+      llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes);
+
+  // Identify instructions that have finished executing, and remove them from
+  // the IssuedSet. References to executed instructions are added to input
+  // vector 'Executed'.
+  void updateIssuedSet(llvm::SmallVectorImpl<InstRef> &Executed);
+
+  // Try to promote instructions from WaitSet to ReadySet.
+  // Add promoted instructions to the 'Ready' vector in input.
+  void promoteToReadySet(llvm::SmallVectorImpl<InstRef> &Ready);
+
+public:
+  Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu)
+      : LSU(Lsu), Resources(llvm::make_unique<ResourceManager>(Model)) {
+    initializeStrategy(nullptr);
+  }
+  Scheduler(const llvm::MCSchedModel &Model, LSUnit *Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : LSU(Lsu), Resources(llvm::make_unique<ResourceManager>(Model)) {
+    initializeStrategy(std::move(SelectStrategy));
+  }
+  Scheduler(std::unique_ptr<ResourceManager> RM, LSUnit *Lsu,
+            std::unique_ptr<SchedulerStrategy> SelectStrategy)
+      : LSU(Lsu), Resources(std::move(RM)) {
+    initializeStrategy(std::move(SelectStrategy));
+  }
+
+  // Stalls generated by the scheduler.
+  enum Status {
+    SC_AVAILABLE,
+    SC_LOAD_QUEUE_FULL,
+    SC_STORE_QUEUE_FULL,
+    SC_BUFFERS_FULL,
+    SC_DISPATCH_GROUP_STALL,
+  };
+
+  /// Check if the instruction in 'IR' can be dispatched and returns an answer
+  /// in the form of a Status value.
+  ///
+  /// The DispatchStage is responsible for querying the Scheduler before
+  /// dispatching new instructions. This routine is used for performing such
+  /// a query.  If the instruction 'IR' can be dispatched, then true is
+  /// returned, otherwise false is returned with Event set to the stall type.
+  /// Internally, it also checks if the load/store unit is available.
+  Status isAvailable(const InstRef &IR) const;
+
+  /// Reserves buffer and LSUnit queue resources that are necessary to issue
+  /// this instruction.
+  ///
+  /// Returns true if instruction IR is ready to be issued to the underlying
+  /// pipelines. Note that this operation cannot fail; it assumes that a
+  /// previous call to method `isAvailable(IR)` returned `SC_AVAILABLE`.
+  void dispatch(const InstRef &IR);
+
+  /// Returns true if IR is ready to be executed by the underlying pipelines.
+  /// This method assumes that IR has been previously dispatched.
+  bool isReady(const InstRef &IR) const;
+
+  /// Issue an instruction and populates a vector of used pipeline resources,
+  /// and a vector of instructions that transitioned to the ready state as a
+  /// result of this event.
+  void
+  issueInstruction(InstRef &IR,
+                   llvm::SmallVectorImpl<std::pair<ResourceRef, double>> &Used,
+                   llvm::SmallVectorImpl<InstRef> &Ready);
+
+  /// Returns true if IR has to be issued immediately, or if IR is a zero
+  /// latency instruction.
+  bool mustIssueImmediately(const InstRef &IR) const;
+
+  /// This routine notifies the Scheduler that a new cycle just started.
+  ///
+  /// It notifies the underlying ResourceManager that a new cycle just started.
+  /// Vector `Freed` is populated with resourceRef related to resources that
+  /// have changed in state, and that are now available to new instructions.
+  /// Instructions executed are added to vector Executed, while vector Ready is
+  /// populated with instructions that have become ready in this new cycle.
+  void cycleEvent(llvm::SmallVectorImpl<ResourceRef> &Freed,
+                  llvm::SmallVectorImpl<InstRef> &Ready,
+                  llvm::SmallVectorImpl<InstRef> &Executed);
+
+  /// Convert a resource mask into a valid llvm processor resource identifier.
+  unsigned getResourceID(uint64_t Mask) const {
+    return Resources->resolveResourceMask(Mask);
+  }
+
+  /// Select the next instruction to issue from the ReadySet. Returns an invalid
+  /// instruction reference if there are no ready instructions, or if processor
+  /// resources are not available.
+  InstRef select();
+
+#ifndef NDEBUG
+  // Update the ready queues.
+  void dump() const;
+
+  // This routine performs a sanity check.  This routine should only be called
+  // when we know that 'IR' is not in the scheduler's instruction queues.
+  void sanityCheck(const InstRef &IR) const {
+    assert(llvm::find(WaitSet, IR) == WaitSet.end());
+    assert(llvm::find(ReadySet, IR) == ReadySet.end());
+    assert(llvm::find(IssuedSet, IR) == IssuedSet.end());
+  }
+#endif // !NDEBUG
+};
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_SCHEDULER_H
Index: llvm/trunk/tools/llvm-mca/include/InstrBuilder.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/InstrBuilder.h
+++ llvm/trunk/tools/llvm-mca/include/InstrBuilder.h
@@ -0,0 +1,90 @@
+//===--------------------- InstrBuilder.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A builder class for instructions that are statically analyzed by llvm-mca.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRBUILDER_H
+
+#include "Instruction.h"
+#include "Support.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Error.h"
+
+namespace mca {
+
+class DispatchUnit;
+
+/// A builder class that knows how to construct Instruction objects.
+///
+/// Every llvm-mca Instruction is described by an object of class InstrDesc.
+/// An InstrDesc describes which registers are read/written by the instruction,
+/// as well as the instruction latency and hardware resources consumed.
+///
+/// This class is used by the tool to construct Instructions and instruction
+/// descriptors (i.e. InstrDesc objects).
+/// Information from the machine scheduling model is used to identify processor
+/// resources that are consumed by an instruction.
+class InstrBuilder {
+  const llvm::MCSubtargetInfo &STI;
+  const llvm::MCInstrInfo &MCII;
+  const llvm::MCRegisterInfo &MRI;
+  const llvm::MCInstrAnalysis &MCIA;
+  llvm::MCInstPrinter &MCIP;
+  llvm::SmallVector<uint64_t, 8> ProcResourceMasks;
+
+  llvm::DenseMap<unsigned short, std::unique_ptr<const InstrDesc>> Descriptors;
+  llvm::DenseMap<const llvm::MCInst *, std::unique_ptr<const InstrDesc>>
+      VariantDescriptors;
+
+  llvm::Expected<const InstrDesc &>
+  createInstrDescImpl(const llvm::MCInst &MCI);
+  llvm::Expected<const InstrDesc &>
+  getOrCreateInstrDesc(const llvm::MCInst &MCI);
+
+  InstrBuilder(const InstrBuilder &) = delete;
+  InstrBuilder &operator=(const InstrBuilder &) = delete;
+
+  llvm::Error populateWrites(InstrDesc &ID, const llvm::MCInst &MCI,
+                             unsigned SchedClassID);
+  llvm::Error populateReads(InstrDesc &ID, const llvm::MCInst &MCI,
+                            unsigned SchedClassID);
+
+public:
+  InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii,
+               const llvm::MCRegisterInfo &mri,
+               const llvm::MCInstrAnalysis &mcia, llvm::MCInstPrinter &mcip)
+      : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), MCIP(mcip),
+        ProcResourceMasks(STI.getSchedModel().getNumProcResourceKinds()) {
+    computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
+  }
+
+  // Returns an array of processor resource masks.
+  // Masks are computed by function mca::computeProcResourceMasks. see
+  // Support.h for a description of how masks are computed and how masks can be
+  // used to solve set membership problems.
+  llvm::ArrayRef<uint64_t> getProcResourceMasks() const {
+    return ProcResourceMasks;
+  }
+
+  void clear() { VariantDescriptors.shrink_and_clear(); }
+
+  llvm::Expected<std::unique_ptr<Instruction>>
+  createInstruction(const llvm::MCInst &MCI);
+};
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/Instruction.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Instruction.h
+++ llvm/trunk/tools/llvm-mca/include/Instruction.h
@@ -0,0 +1,449 @@
+//===--------------------- Instruction.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines abstractions used by the Pipeline to model register reads,
+/// register writes and instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTION_H
+
+#include "llvm/Support/MathExtras.h"
+
+#ifndef NDEBUG
+#include "llvm/Support/raw_ostream.h"
+#endif
+
+#include <memory>
+#include <set>
+#include <vector>
+
+namespace mca {
+
+constexpr int UNKNOWN_CYCLES = -512;
+
+/// A register write descriptor.
+struct WriteDescriptor {
+  // Operand index. The index is negative for implicit writes only.
+  // For implicit writes, the actual operand index is computed performing
+  // a bitwise not of the OpIndex.
+  int OpIndex;
+  // Write latency. Number of cycles before write-back stage.
+  unsigned Latency;
+  // This field is set to a value different than zero only if this
+  // is an implicit definition.
+  unsigned RegisterID;
+  // Instruction itineraries would set this field to the SchedClass ID.
+  // Otherwise, it defaults to the WriteResourceID from the MCWriteLatencyEntry
+  // element associated to this write.
+  // When computing read latencies, this value is matched against the
+  // "ReadAdvance" information. The hardware backend may implement
+  // dedicated forwarding paths to quickly propagate write results to dependent
+  // instructions waiting in the reservation station (effectively bypassing the
+  // write-back stage).
+  unsigned SClassOrWriteResourceID;
+  // True only if this is a write obtained from an optional definition.
+  // Optional definitions are allowed to reference regID zero (i.e. "no
+  // register").
+  bool IsOptionalDef;
+
+  bool isImplicitWrite() const { return OpIndex < 0; };
+};
+
+/// A register read descriptor.
+struct ReadDescriptor {
+  // A MCOperand index. This is used by the Dispatch logic to identify register
+  // reads. Implicit reads have negative indices. The actual operand index of an
+  // implicit read is the bitwise not of field OpIndex.
+  int OpIndex;
+  // The actual "UseIdx". This is used to query the ReadAdvance table. Explicit
+  // uses always come first in the sequence of uses.
+  unsigned UseIndex;
+  // This field is only set if this is an implicit read.
+  unsigned RegisterID;
+  // Scheduling Class Index. It is used to query the scheduling model for the
+  // MCSchedClassDesc object.
+  unsigned SchedClassID;
+
+  bool isImplicitRead() const { return OpIndex < 0; };
+};
+
+class ReadState;
+
+/// Tracks uses of a register definition (e.g. register write).
+///
+/// Each implicit/explicit register write is associated with an instance of
+/// this class. A WriteState object tracks the dependent users of a
+/// register write. It also tracks how many cycles are left before the write
+/// back stage.
+class WriteState {
+  const WriteDescriptor &WD;
+  // On instruction issue, this field is set equal to the write latency.
+  // Before instruction issue, this field defaults to -512, a special
+  // value that represents an "unknown" number of cycles.
+  int CyclesLeft;
+
+  // Actual register defined by this write. This field is only used
+  // to speedup queries on the register file.
+  // For implicit writes, this field always matches the value of
+  // field RegisterID from WD.
+  unsigned RegisterID;
+
+  // True if this write implicitly clears the upper portion of RegisterID's
+  // super-registers.
+  bool ClearsSuperRegs;
+
+  // This field is set if this is a partial register write, and it has a false
+  // dependency on any previous write of the same register (or a portion of it).
+  // DependentWrite must be able to complete before this write completes, so
+  // that we don't break the WAW, and the two writes can be merged together.
+  const WriteState *DependentWrite;
+
+  // Number of writes that are in a WAW dependency with this write.
+  unsigned NumWriteUsers;
+
+  // A list of dependent reads. Users is a set of dependent
+  // reads. A dependent read is added to the set only if CyclesLeft
+  // is "unknown". As soon as CyclesLeft is 'known', each user in the set
+  // gets notified with the actual CyclesLeft.
+
+  // The 'second' element of a pair is a "ReadAdvance" number of cycles.
+  std::set<std::pair<ReadState *, int>> Users;
+
+public:
+  WriteState(const WriteDescriptor &Desc, unsigned RegID,
+             bool clearsSuperRegs = false)
+      : WD(Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
+        ClearsSuperRegs(clearsSuperRegs), DependentWrite(nullptr),
+        NumWriteUsers(0U) {}
+  WriteState(const WriteState &Other) = delete;
+  WriteState &operator=(const WriteState &Other) = delete;
+
+  int getCyclesLeft() const { return CyclesLeft; }
+  unsigned getWriteResourceID() const { return WD.SClassOrWriteResourceID; }
+  unsigned getRegisterID() const { return RegisterID; }
+  unsigned getLatency() const { return WD.Latency; }
+
+  void addUser(ReadState *Use, int ReadAdvance);
+
+  unsigned getNumUsers() const { return Users.size() + NumWriteUsers; }
+  bool clearsSuperRegisters() const { return ClearsSuperRegs; }
+
+  const WriteState *getDependentWrite() const { return DependentWrite; }
+  void setDependentWrite(WriteState *Other) {
+    DependentWrite = Other;
+    ++Other->NumWriteUsers;
+  }
+
+  // On every cycle, update CyclesLeft and notify dependent users.
+  void cycleEvent();
+  void onInstructionIssued();
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+/// Tracks register operand latency in cycles.
+///
+/// A read may be dependent on more than one write. This occurs when some
+/// writes only partially update the register associated to this read.
+class ReadState {
+  const ReadDescriptor &RD;
+  // Physical register identified associated to this read.
+  unsigned RegisterID;
+  // Number of writes that contribute to the definition of RegisterID.
+  // In the absence of partial register updates, the number of DependentWrites
+  // cannot be more than one.
+  unsigned DependentWrites;
+  // Number of cycles left before RegisterID can be read. This value depends on
+  // the latency of all the dependent writes. It defaults to UNKNOWN_CYCLES.
+  // It gets set to the value of field TotalCycles only when the 'CyclesLeft' of
+  // every dependent write is known.
+  int CyclesLeft;
+  // This field is updated on every writeStartEvent(). When the number of
+  // dependent writes (i.e. field DependentWrite) is zero, this value is
+  // propagated to field CyclesLeft.
+  unsigned TotalCycles;
+  // This field is set to true only if there are no dependent writes, and
+  // there are no `CyclesLeft' to wait.
+  bool IsReady;
+
+public:
+  ReadState(const ReadDescriptor &Desc, unsigned RegID)
+      : RD(Desc), RegisterID(RegID), DependentWrites(0),
+        CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true) {}
+  ReadState(const ReadState &Other) = delete;
+  ReadState &operator=(const ReadState &Other) = delete;
+
+  const ReadDescriptor &getDescriptor() const { return RD; }
+  unsigned getSchedClass() const { return RD.SchedClassID; }
+  unsigned getRegisterID() const { return RegisterID; }
+
+  bool isReady() const { return IsReady; }
+  bool isImplicitRead() const { return RD.isImplicitRead(); }
+
+  void cycleEvent();
+  void writeStartEvent(unsigned Cycles);
+  void setDependentWrites(unsigned Writes) {
+    DependentWrites = Writes;
+    IsReady = !Writes;
+  }
+};
+
+/// A sequence of cycles.
+///
+/// This class can be used as a building block to construct ranges of cycles.
+class CycleSegment {
+  unsigned Begin; // Inclusive.
+  unsigned End;   // Exclusive.
+  bool Reserved;  // Resources associated to this segment must be reserved.
+
+public:
+  CycleSegment(unsigned StartCycle, unsigned EndCycle, bool IsReserved = false)
+      : Begin(StartCycle), End(EndCycle), Reserved(IsReserved) {}
+
+  bool contains(unsigned Cycle) const { return Cycle >= Begin && Cycle < End; }
+  bool startsAfter(const CycleSegment &CS) const { return End <= CS.Begin; }
+  bool endsBefore(const CycleSegment &CS) const { return Begin >= CS.End; }
+  bool overlaps(const CycleSegment &CS) const {
+    return !startsAfter(CS) && !endsBefore(CS);
+  }
+  bool isExecuting() const { return Begin == 0 && End != 0; }
+  bool isExecuted() const { return End == 0; }
+  bool operator<(const CycleSegment &Other) const {
+    return Begin < Other.Begin;
+  }
+  CycleSegment &operator--(void) {
+    if (Begin)
+      Begin--;
+    if (End)
+      End--;
+    return *this;
+  }
+
+  bool isValid() const { return Begin <= End; }
+  unsigned size() const { return End - Begin; };
+  void Subtract(unsigned Cycles) {
+    assert(End >= Cycles);
+    End -= Cycles;
+  }
+
+  unsigned begin() const { return Begin; }
+  unsigned end() const { return End; }
+  void setEnd(unsigned NewEnd) { End = NewEnd; }
+  bool isReserved() const { return Reserved; }
+  void setReserved() { Reserved = true; }
+};
+
+/// Helper used by class InstrDesc to describe how hardware resources
+/// are used.
+///
+/// This class describes how many resource units of a specific resource kind
+/// (and how many cycles) are "used" by an instruction.
+struct ResourceUsage {
+  CycleSegment CS;
+  unsigned NumUnits;
+  ResourceUsage(CycleSegment Cycles, unsigned Units = 1)
+      : CS(Cycles), NumUnits(Units) {}
+  unsigned size() const { return CS.size(); }
+  bool isReserved() const { return CS.isReserved(); }
+  void setReserved() { CS.setReserved(); }
+};
+
+/// An instruction descriptor
+struct InstrDesc {
+  std::vector<WriteDescriptor> Writes; // Implicit writes are at the end.
+  std::vector<ReadDescriptor> Reads;   // Implicit reads are at the end.
+
+  // For every resource used by an instruction of this kind, this vector
+  // reports the number of "consumed cycles".
+  std::vector<std::pair<uint64_t, ResourceUsage>> Resources;
+
+  // A list of buffered resources consumed by this instruction.
+  std::vector<uint64_t> Buffers;
+  unsigned MaxLatency;
+  // Number of MicroOps for this instruction.
+  unsigned NumMicroOps;
+
+  bool MayLoad;
+  bool MayStore;
+  bool HasSideEffects;
+
+  // A zero latency instruction doesn't consume any scheduler resources.
+  bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
+};
+
+/// An instruction propagated through the simulated instruction pipeline.
+///
+/// This class is used to monitor changes to the internal state of instructions
+/// that are sent to the various components of the simulated hardware pipeline.
+class Instruction {
+  const InstrDesc &Desc;
+
+  enum InstrStage {
+    IS_INVALID,   // Instruction in an invalid state.
+    IS_AVAILABLE, // Instruction dispatched but operands are not ready.
+    IS_READY,     // Instruction dispatched and operands ready.
+    IS_EXECUTING, // Instruction issued.
+    IS_EXECUTED,  // Instruction executed. Values are written back.
+    IS_RETIRED    // Instruction retired.
+  };
+
+  // The current instruction stage.
+  enum InstrStage Stage;
+
+  // This value defaults to the instruction latency. This instruction is
+  // considered executed when field CyclesLeft goes to zero.
+  int CyclesLeft;
+
+  // Retire Unit token ID for this instruction.
+  unsigned RCUTokenID;
+
+  bool IsDepBreaking;
+
+  using UniqueDef = std::unique_ptr<WriteState>;
+  using UniqueUse = std::unique_ptr<ReadState>;
+  using VecDefs = std::vector<UniqueDef>;
+  using VecUses = std::vector<UniqueUse>;
+
+  // Output dependencies.
+  // One entry per each implicit and explicit register definition.
+  VecDefs Defs;
+
+  // Input dependencies.
+  // One entry per each implicit and explicit register use.
+  VecUses Uses;
+
+public:
+  Instruction(const InstrDesc &D)
+      : Desc(D), Stage(IS_INVALID), CyclesLeft(UNKNOWN_CYCLES), RCUTokenID(0),
+        IsDepBreaking(false) {}
+  Instruction(const Instruction &Other) = delete;
+  Instruction &operator=(const Instruction &Other) = delete;
+
+  VecDefs &getDefs() { return Defs; }
+  const VecDefs &getDefs() const { return Defs; }
+  VecUses &getUses() { return Uses; }
+  const VecUses &getUses() const { return Uses; }
+  const InstrDesc &getDesc() const { return Desc; }
+  unsigned getRCUTokenID() const { return RCUTokenID; }
+  int getCyclesLeft() const { return CyclesLeft; }
+
+  bool hasDependentUsers() const {
+    return std::any_of(Defs.begin(), Defs.end(), [](const UniqueDef &Def) {
+      return Def->getNumUsers() > 0;
+    });
+  }
+
+  bool isDependencyBreaking() const { return IsDepBreaking; }
+  void setDependencyBreaking() { IsDepBreaking = true; }
+
+  unsigned getNumUsers() const {
+    unsigned NumUsers = 0;
+    for (const UniqueDef &Def : Defs)
+      NumUsers += Def->getNumUsers();
+    return NumUsers;
+  }
+
+  // Transition to the dispatch stage, and assign a RCUToken to this
+  // instruction. The RCUToken is used to track the completion of every
+  // register write performed by this instruction.
+  void dispatch(unsigned RCUTokenID);
+
+  // Instruction issued. Transition to the IS_EXECUTING state, and update
+  // all the definitions.
+  void execute();
+
+  // Force a transition from the IS_AVAILABLE state to the IS_READY state if
+  // input operands are all ready. State transitions normally occur at the
+  // beginning of a new cycle (see method cycleEvent()). However, the scheduler
+  // may decide to promote instructions from the wait queue to the ready queue
+  // as the result of another issue event.  This method is called every time the
+  // instruction might have changed in state.
+  void update();
+
+  bool isDispatched() const { return Stage == IS_AVAILABLE; }
+  bool isReady() const { return Stage == IS_READY; }
+  bool isExecuting() const { return Stage == IS_EXECUTING; }
+  bool isExecuted() const { return Stage == IS_EXECUTED; }
+  bool isRetired() const { return Stage == IS_RETIRED; }
+
+  void retire() {
+    assert(isExecuted() && "Instruction is in an invalid state!");
+    Stage = IS_RETIRED;
+  }
+
+  void cycleEvent();
+};
+
+/// An InstRef contains both a SourceMgr index and Instruction pair.  The index
+/// is used as a unique identifier for the instruction.  MCA will make use of
+/// this index as a key throughout MCA.
+class InstRef : public std::pair<unsigned, Instruction *> {
+public:
+  InstRef() : std::pair<unsigned, Instruction *>(0, nullptr) {}
+  InstRef(unsigned Index, Instruction *I)
+      : std::pair<unsigned, Instruction *>(Index, I) {}
+
+  unsigned getSourceIndex() const { return first; }
+  Instruction *getInstruction() { return second; }
+  const Instruction *getInstruction() const { return second; }
+
+  /// Returns true if this references a valid instruction.
+  bool isValid() const { return second != nullptr; }
+
+  /// Invalidate this reference.
+  void invalidate() { second = nullptr; }
+
+#ifndef NDEBUG
+  void print(llvm::raw_ostream &OS) const { OS << getSourceIndex(); }
+#endif
+};
+
+#ifndef NDEBUG
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InstRef &IR) {
+  IR.print(OS);
+  return OS;
+}
+#endif
+
+/// A reference to a register write.
+///
+/// This class is mainly used by the register file to describe register
+/// mappings. It correlates a register write to the source index of the
+/// defining instruction.
+class WriteRef {
+  std::pair<unsigned, WriteState *> Data;
+  static const unsigned INVALID_IID;
+
+public:
+  WriteRef() : Data(INVALID_IID, nullptr) {}
+  WriteRef(unsigned SourceIndex, WriteState *WS) : Data(SourceIndex, WS) {}
+
+  unsigned getSourceIndex() const { return Data.first; }
+  const WriteState *getWriteState() const { return Data.second; }
+  WriteState *getWriteState() { return Data.second; }
+  void invalidate() { Data = std::make_pair(INVALID_IID, nullptr); }
+
+  bool isValid() const {
+    return Data.first != INVALID_IID && Data.second != nullptr;
+  }
+  bool operator==(const WriteRef &Other) const { return Data == Other.Data; }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/Pipeline.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Pipeline.h
+++ llvm/trunk/tools/llvm-mca/include/Pipeline.h
@@ -0,0 +1,76 @@
+//===--------------------- Pipeline.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+#define LLVM_TOOLS_LLVM_MCA_PIPELINE_H
+
+#include "HardwareUnits/Scheduler.h"
+#include "Stages/Stage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Error.h"
+
+namespace mca {
+
+class HWEventListener;
+class HWInstructionEvent;
+class HWStallEvent;
+
+/// A pipeline for a specific subtarget.
+///
+/// It emulates an out-of-order execution of instructions. Instructions are
+/// fetched from a MCInst sequence managed by an initial 'Fetch' stage.
+/// Instructions are firstly fetched, then dispatched to the schedulers, and
+/// then executed.
+///
+/// This class tracks the lifetime of an instruction from the moment where
+/// it gets dispatched to the schedulers, to the moment where it finishes
+/// executing and register writes are architecturally committed.
+/// In particular, it monitors changes in the state of every instruction
+/// in flight.
+///
+/// Instructions are executed in a loop of iterations. The number of iterations
+/// is defined by the SourceMgr object, which is managed by the initial stage
+/// of the instruction pipeline.
+///
+/// The Pipeline entry point is method 'run()' which executes cycles in a loop
+/// until there are new instructions to dispatch, and not every instruction
+/// has been retired.
+///
+/// Internally, the Pipeline collects statistical information in the form of
+/// histograms. For example, it tracks how the dispatch group size changes
+/// over time.
+class Pipeline {
+  Pipeline(const Pipeline &P) = delete;
+  Pipeline &operator=(const Pipeline &P) = delete;
+
+  /// An ordered list of stages that define this instruction pipeline.
+  llvm::SmallVector<std::unique_ptr<Stage>, 8> Stages;
+  std::set<HWEventListener *> Listeners;
+  unsigned Cycles;
+
+  llvm::Error runCycle();
+  bool hasWorkToProcess();
+  void notifyCycleBegin();
+  void notifyCycleEnd();
+
+public:
+  Pipeline() : Cycles(0) {}
+  void appendStage(std::unique_ptr<Stage> S);
+  llvm::Error run();
+  void addEventListener(HWEventListener *Listener);
+};
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_PIPELINE_H
Index: llvm/trunk/tools/llvm-mca/include/SourceMgr.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/SourceMgr.h
+++ llvm/trunk/tools/llvm-mca/include/SourceMgr.h
@@ -0,0 +1,64 @@
+//===--------------------- SourceMgr.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements class SourceMgr. Class SourceMgr abstracts the input
+/// code sequence (a sequence of MCInst), and assings unique identifiers to
+/// every instruction in the sequence.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
+#define LLVM_TOOLS_LLVM_MCA_SOURCEMGR_H
+
+#include "llvm/MC/MCInst.h"
+#include <vector>
+
+namespace mca {
+
+typedef std::pair<unsigned, const llvm::MCInst *> SourceRef;
+
+class SourceMgr {
+  using InstVec = std::vector<std::unique_ptr<const llvm::MCInst>>;
+  const InstVec &Sequence;
+  unsigned Current;
+  unsigned Iterations;
+  static const unsigned DefaultIterations = 100;
+
+public:
+  SourceMgr(const InstVec &MCInstSequence, unsigned NumIterations)
+      : Sequence(MCInstSequence), Current(0),
+        Iterations(NumIterations ? NumIterations : DefaultIterations) {}
+
+  unsigned getCurrentIteration() const { return Current / Sequence.size(); }
+  unsigned getNumIterations() const { return Iterations; }
+  unsigned size() const { return Sequence.size(); }
+  const InstVec &getSequence() const { return Sequence; }
+
+  bool hasNext() const { return Current < (Iterations * size()); }
+  void updateNext() { Current++; }
+
+  const SourceRef peekNext() const {
+    assert(hasNext() && "Already at end of sequence!");
+    unsigned Index = getCurrentInstructionIndex();
+    return SourceRef(Current, Sequence[Index].get());
+  }
+
+  unsigned getCurrentInstructionIndex() const {
+    return Current % Sequence.size();
+  }
+
+  const llvm::MCInst &getMCInstFromIndex(unsigned Index) const {
+    return *Sequence[Index % size()];
+  }
+
+  bool isEmpty() const { return size() == 0; }
+};
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/DispatchStage.h
@@ -0,0 +1,95 @@
+//===----------------------- DispatchStage.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+#define LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
+
+#include "HWEventListener.h"
+#include "HardwareUnits/RegisterFile.h"
+#include "HardwareUnits/RetireControlUnit.h"
+#include "Instruction.h"
+#include "Stages/Stage.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace mca {
+
+// Implements the hardware dispatch logic.
+//
+// This class is responsible for the dispatch stage, in which instructions are
+// dispatched in groups to the Scheduler.  An instruction can be dispatched if
+// the following conditions are met:
+//  1) There are enough entries in the reorder buffer (see class
+//     RetireControlUnit) to write the opcodes associated with the instruction.
+//  2) There are enough physical registers to rename output register operands.
+//  3) There are enough entries available in the used buffered resource(s).
+//
+// The number of micro opcodes that can be dispatched in one cycle is limited by
+// the value of field 'DispatchWidth'. A "dynamic dispatch stall" occurs when
+// processor resources are not available. Dispatch stall events are counted
+// during the entire execution of the code, and displayed by the performance
+// report when flag '-dispatch-stats' is specified.
+//
+// If the number of micro opcodes exceedes DispatchWidth, then the instruction
+// is dispatched in multiple cycles.
+class DispatchStage final : public Stage {
+  unsigned DispatchWidth;
+  unsigned AvailableEntries;
+  unsigned CarryOver;
+  const llvm::MCSubtargetInfo &STI;
+  RetireControlUnit &RCU;
+  RegisterFile &PRF;
+
+  bool checkRCU(const InstRef &IR) const;
+  bool checkPRF(const InstRef &IR) const;
+  bool canDispatch(const InstRef &IR) const;
+  llvm::Error dispatch(InstRef IR);
+
+  void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
+
+  void notifyInstructionDispatched(const InstRef &IR,
+                                   llvm::ArrayRef<unsigned> UsedPhysRegs);
+
+  void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
+                     unsigned RegID) const {
+    return PRF.collectWrites(Vec, RegID);
+  }
+
+public:
+  DispatchStage(const llvm::MCSubtargetInfo &Subtarget,
+                const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
+                unsigned MaxDispatchWidth, RetireControlUnit &R,
+                RegisterFile &F)
+      : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
+        CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {}
+
+  bool isAvailable(const InstRef &IR) const override;
+
+  // The dispatch logic internally doesn't buffer instructions. So there is
+  // never work to do at the beginning of every cycle.
+  bool hasWorkToComplete() const override { return false; }
+  llvm::Error cycleStart() override;
+  llvm::Error execute(InstRef &IR) override;
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_DISPATCH_STAGE_H
Index: llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/ExecuteStage.h
@@ -0,0 +1,78 @@
+//===---------------------- ExecuteStage.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of a default instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
+#define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
+
+#include "HardwareUnits/Scheduler.h"
+#include "Instruction.h"
+#include "Stages/Stage.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace mca {
+
+class ExecuteStage final : public Stage {
+  Scheduler &HWS;
+
+  llvm::Error issueInstruction(InstRef &IR);
+
+  // Called at the beginning of each cycle to issue already dispatched
+  // instructions to the underlying pipelines.
+  llvm::Error issueReadyInstructions();
+
+  ExecuteStage(const ExecuteStage &Other) = delete;
+  ExecuteStage &operator=(const ExecuteStage &Other) = delete;
+
+public:
+  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
+
+  // This stage works under the assumption that the Pipeline will eventually
+  // execute a retire stage. We don't need to check if pipelines and/or
+  // schedulers have instructions to process, because those instructions are
+  // also tracked by the retire control unit. That means,
+  // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
+  // are still instructions in-flight in the out-of-order backend.
+  bool hasWorkToComplete() const override { return false; }
+  bool isAvailable(const InstRef &IR) const override;
+
+  // Notifies the scheduler that a new cycle just started.
+  //
+  // This method notifies the scheduler that a new cycle started.
+  // This method is also responsible for notifying listeners about instructions
+  // state changes, and processor resources freed by the scheduler.
+  // Instructions that transitioned to the 'Executed' state are automatically
+  // moved to the next stage (i.e. RetireStage).
+  llvm::Error cycleStart() override;
+  llvm::Error execute(InstRef &IR) override;
+
+  void
+  notifyInstructionIssued(const InstRef &IR,
+                          llvm::ArrayRef<std::pair<ResourceRef, double>> Used);
+  void notifyInstructionExecuted(const InstRef &IR);
+  void notifyInstructionReady(const InstRef &IR);
+  void notifyResourceAvailable(const ResourceRef &RR);
+
+  // Notify listeners that buffered resources were consumed.
+  void notifyReservedBuffers(llvm::ArrayRef<uint64_t> Buffers);
+
+  // Notify listeners that buffered resources were freed.
+  void notifyReleasedBuffers(llvm::ArrayRef<uint64_t> Buffers);
+};
+
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
Index: llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/FetchStage.h
@@ -0,0 +1,52 @@
+//===---------------------- FetchStage.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline.  Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
+#define LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
+
+#include "InstrBuilder.h"
+#include "SourceMgr.h"
+#include "Stages/Stage.h"
+#include <map>
+
+namespace mca {
+
+class FetchStage final : public Stage {
+  std::unique_ptr<Instruction> CurrentInstruction;
+  using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
+  InstMap Instructions;
+  InstrBuilder &IB;
+  SourceMgr &SM;
+
+  // Updates the program counter, and sets 'CurrentInstruction'.
+  llvm::Error getNextInstruction();
+
+  FetchStage(const FetchStage &Other) = delete;
+  FetchStage &operator=(const FetchStage &Other) = delete;
+
+public:
+  FetchStage(InstrBuilder &IB, SourceMgr &SM)
+      : CurrentInstruction(), IB(IB), SM(SM) {}
+
+  bool isAvailable(const InstRef &IR) const override;
+  bool hasWorkToComplete() const override;
+  llvm::Error execute(InstRef &IR) override;
+  llvm::Error cycleStart() override;
+  llvm::Error cycleEnd() override;
+};
+
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_FETCH_STAGE_H
Index: llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/InstructionTables.h
@@ -0,0 +1,42 @@
+//===--------------------- InstructionTables.h ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a custom stage to generate instruction tables.
+/// See the description of command-line flag -instruction-tables in
+/// docs/CommandGuide/lvm-mca.rst
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
+#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONTABLES_H
+
+#include "HardwareUnits/Scheduler.h"
+#include "InstrBuilder.h"
+#include "Stages/Stage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace mca {
+
+class InstructionTables final : public Stage {
+  const llvm::MCSchedModel &SM;
+  InstrBuilder &IB;
+  llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
+
+public:
+  InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
+      : Stage(), SM(Model), IB(Builder) {}
+
+  bool hasWorkToComplete() const override { return false; }
+  llvm::Error execute(InstRef &IR) override;
+};
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/RetireStage.h
@@ -0,0 +1,46 @@
+//===---------------------- RetireStage.h -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of a default instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+#define LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
+
+#include "HardwareUnits/RegisterFile.h"
+#include "HardwareUnits/RetireControlUnit.h"
+#include "Stages/Stage.h"
+
+namespace mca {
+
+class RetireStage final : public Stage {
+  // Owner will go away when we move listeners/eventing to the stages.
+  RetireControlUnit &RCU;
+  RegisterFile &PRF;
+
+  RetireStage(const RetireStage &Other) = delete;
+  RetireStage &operator=(const RetireStage &Other) = delete;
+
+public:
+  RetireStage(RetireControlUnit &R, RegisterFile &F)
+      : Stage(), RCU(R), PRF(F) {}
+
+  bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
+  llvm::Error cycleStart() override;
+  llvm::Error execute(InstRef &IR) override;
+  void notifyInstructionRetired(const InstRef &IR);
+};
+
+} // namespace mca
+
+#endif // LLVM_TOOLS_LLVM_MCA_RETIRE_STAGE_H
Index: llvm/trunk/tools/llvm-mca/include/Stages/Stage.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Stages/Stage.h
+++ llvm/trunk/tools/llvm-mca/include/Stages/Stage.h
@@ -0,0 +1,86 @@
+//===---------------------- Stage.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_STAGE_H
+#define LLVM_TOOLS_LLVM_MCA_STAGE_H
+
+#include "HWEventListener.h"
+#include "llvm/Support/Error.h"
+#include <set>
+
+namespace mca {
+
+class InstRef;
+
+class Stage {
+  Stage *NextInSequence;
+  std::set<HWEventListener *> Listeners;
+
+  Stage(const Stage &Other) = delete;
+  Stage &operator=(const Stage &Other) = delete;
+
+protected:
+  const std::set<HWEventListener *> &getListeners() const { return Listeners; }
+
+public:
+  Stage() : NextInSequence(nullptr) {}
+  virtual ~Stage();
+
+  /// Returns true if it can execute IR during this cycle.
+  virtual bool isAvailable(const InstRef &IR) const { return true; }
+
+  /// Returns true if some instructions are still executing this stage.
+  virtual bool hasWorkToComplete() const = 0;
+
+  /// Called once at the start of each cycle.  This can be used as a setup
+  /// phase to prepare for the executions during the cycle.
+  virtual llvm::Error cycleStart() { return llvm::ErrorSuccess(); }
+
+  /// Called once at the end of each cycle.
+  virtual llvm::Error cycleEnd() { return llvm::ErrorSuccess(); }
+
+  /// The primary action that this stage performs on instruction IR.
+  virtual llvm::Error execute(InstRef &IR) = 0;
+
+  void setNextInSequence(Stage *NextStage) {
+    assert(!NextInSequence && "This stage already has a NextInSequence!");
+    NextInSequence = NextStage;
+  }
+
+  bool checkNextStage(const InstRef &IR) const {
+    return NextInSequence && NextInSequence->isAvailable(IR);
+  }
+
+  /// Called when an instruction is ready to move the next pipeline stage.
+  ///
+  /// Stages are responsible for moving instructions to their immediate
+  /// successor stages.
+  llvm::Error moveToTheNextStage(InstRef &IR) {
+    assert(checkNextStage(IR) && "Next stage is not ready!");
+    return NextInSequence->execute(IR);
+  }
+
+  /// Add a listener to receive callbacks during the execution of this stage.
+  void addListener(HWEventListener *Listener);
+
+  /// Notify listeners of a particular hardware event.
+  template <typename EventT> void notifyEvent(const EventT &Event) const {
+    for (HWEventListener *Listener : Listeners)
+      Listener->onEvent(Event);
+  }
+};
+
+} // namespace mca
+#endif // LLVM_TOOLS_LLVM_MCA_STAGE_H
Index: llvm/trunk/tools/llvm-mca/include/Support.h
===================================================================
--- llvm/trunk/tools/llvm-mca/include/Support.h
+++ llvm/trunk/tools/llvm-mca/include/Support.h
@@ -0,0 +1,58 @@
+//===--------------------- Support.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Helper functions used by various pipeline components.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_MCA_SUPPORT_H
+#define LLVM_TOOLS_LLVM_MCA_SUPPORT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace mca {
+
+/// Populates vector Masks with processor resource masks.
+///
+/// The number of bits set in a mask depends on the processor resource type.
+/// Each processor resource mask has at least one bit set. For groups, the
+/// number of bits set in the mask is equal to the cardinality of the group plus
+/// one. Excluding the most significant bit, the remaining bits in the mask
+/// identify processor resources that are part of the group.
+///
+/// Example:
+///
+///  ResourceA  -- Mask: 0b001
+///  ResourceB  -- Mask: 0b010
+///  ResourceAB -- Mask: 0b100 U (ResourceA::Mask | ResourceB::Mask) == 0b111
+///
+/// ResourceAB is a processor resource group containing ResourceA and ResourceB.
+/// Each resource mask uniquely identifies a resource; both ResourceA and
+/// ResourceB only have one bit set.
+/// ResourceAB is a group; excluding the most significant bit in the mask, the
+/// remaining bits identify the composition of the group.
+///
+/// Resource masks are used by the ResourceManager to solve set membership
+/// problems with simple bit manipulation operations.
+void computeProcResourceMasks(const llvm::MCSchedModel &SM,
+                              llvm::SmallVectorImpl<uint64_t> &Masks);
+
+/// Compute the reciprocal block throughput from a set of processor resource
+/// cycles. The reciprocal block throughput is computed as the MAX between:
+///  - NumMicroOps / DispatchWidth
+///  - ProcResourceCycles / #ProcResourceUnits  (for every consumed resource).
+double computeBlockRThroughput(const llvm::MCSchedModel &SM,
+                               unsigned DispatchWidth, unsigned NumMicroOps,
+                               llvm::ArrayRef<unsigned> ProcResourceUsage);
+} // namespace mca
+
+#endif
Index: llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt
+++ llvm/trunk/tools/llvm-mca/lib/CMakeLists.txt
@@ -0,0 +1,33 @@
+include_directories(${LLVM_MCA_SOURCE_DIR}/include)
+
+add_library(LLVMMCA
+  STATIC
+  Context.cpp
+  HWEventListener.cpp
+  HardwareUnits/HardwareUnit.cpp
+  HardwareUnits/LSUnit.cpp
+  HardwareUnits/RegisterFile.cpp
+  HardwareUnits/ResourceManager.cpp
+  HardwareUnits/RetireControlUnit.cpp
+  HardwareUnits/Scheduler.cpp
+  InstrBuilder.cpp
+  Instruction.cpp
+  Pipeline.cpp
+  Stages/DispatchStage.cpp
+  Stages/ExecuteStage.cpp
+  Stages/FetchStage.cpp
+  Stages/InstructionTables.cpp
+  Stages/RetireStage.cpp
+  Stages/Stage.cpp
+  Support.cpp
+  )
+
+llvm_update_compile_flags(LLVMMCA)
+llvm_map_components_to_libnames(libs
+  CodeGen
+  MC
+  Support
+  )
+
+target_link_libraries(LLVMMCA ${libs})
+set_target_properties(LLVMMCA PROPERTIES FOLDER "Libraries")
Index: llvm/trunk/tools/llvm-mca/lib/Context.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Context.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Context.cpp
@@ -0,0 +1,65 @@
+//===---------------------------- Context.cpp -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a class for holding ownership of various simulated
+/// hardware units.  A Context also provides a utility routine for constructing
+/// a default out-of-order pipeline with fetch, dispatch, execute, and retire
+/// stages.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Context.h"
+#include "HardwareUnits/RegisterFile.h"
+#include "HardwareUnits/RetireControlUnit.h"
+#include "HardwareUnits/Scheduler.h"
+#include "Stages/DispatchStage.h"
+#include "Stages/ExecuteStage.h"
+#include "Stages/FetchStage.h"
+#include "Stages/RetireStage.h"
+
+namespace mca {
+
+using namespace llvm;
+
+std::unique_ptr<Pipeline>
+Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
+                               SourceMgr &SrcMgr) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Create the hardware units defining the backend.
+  auto RCU = llvm::make_unique<RetireControlUnit>(SM);
+  auto PRF = llvm::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+  auto LSU = llvm::make_unique<LSUnit>(Opts.LoadQueueSize, Opts.StoreQueueSize,
+                                       Opts.AssumeNoAlias);
+  auto HWS = llvm::make_unique<Scheduler>(SM, LSU.get());
+
+  // Create the pipeline and its stages.
+  auto StagePipeline = llvm::make_unique<Pipeline>();
+  auto Fetch = llvm::make_unique<FetchStage>(IB, SrcMgr);
+  auto Dispatch = llvm::make_unique<DispatchStage>(
+      STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF);
+  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+  auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
+
+  // Pass the ownership of all the hardware units to this Context.
+  addHardwareUnit(std::move(RCU));
+  addHardwareUnit(std::move(PRF));
+  addHardwareUnit(std::move(LSU));
+  addHardwareUnit(std::move(HWS));
+
+  // Build the pipeline.
+  StagePipeline->appendStage(std::move(Fetch));
+  StagePipeline->appendStage(std::move(Dispatch));
+  StagePipeline->appendStage(std::move(Execute));
+  StagePipeline->appendStage(std::move(Retire));
+  return StagePipeline;
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HWEventListener.cpp
@@ -0,0 +1,21 @@
+//===----------------------- HWEventListener.cpp ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a vtable anchor for class HWEventListener.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HWEventListener.h"
+
+namespace mca {
+
+// Anchor the vtable here.
+void HWEventListener::anchor() {}
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/HardwareUnit.cpp
@@ -0,0 +1,23 @@
+//===------------------------- HardwareUnit.cpp -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the anchor for the base class that describes
+/// simulated hardware units.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/HardwareUnit.h"
+
+namespace mca {
+
+// Pin the vtable with this method.
+HardwareUnit::~HardwareUnit() = default;
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/LSUnit.cpp
@@ -0,0 +1,156 @@
+//===----------------------- LSUnit.cpp --------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// A Load-Store Unit for the llvm-mca tool.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/LSUnit.h"
+#include "Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+#ifndef NDEBUG
+void LSUnit::dump() const {
+  dbgs() << "[LSUnit] LQ_Size = " << LQ_Size << '\n';
+  dbgs() << "[LSUnit] SQ_Size = " << SQ_Size << '\n';
+  dbgs() << "[LSUnit] NextLQSlotIdx = " << LoadQueue.size() << '\n';
+  dbgs() << "[LSUnit] NextSQSlotIdx = " << StoreQueue.size() << '\n';
+}
+#endif
+
+void LSUnit::assignLQSlot(unsigned Index) {
+  assert(!isLQFull());
+  assert(LoadQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignLQSlot <Idx=" << Index
+                    << ",slot=" << LoadQueue.size() << ">\n");
+  LoadQueue.insert(Index);
+}
+
+void LSUnit::assignSQSlot(unsigned Index) {
+  assert(!isSQFull());
+  assert(StoreQueue.count(Index) == 0);
+
+  LLVM_DEBUG(dbgs() << "[LSUnit] - AssignSQSlot <Idx=" << Index
+                    << ",slot=" << StoreQueue.size() << ">\n");
+  StoreQueue.insert(Index);
+}
+
+void LSUnit::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned IsMemBarrier = Desc.HasSideEffects;
+  assert((Desc.MayLoad || Desc.MayStore) && "Not a memory operation!");
+
+  const unsigned Index = IR.getSourceIndex();
+  if (Desc.MayLoad) {
+    if (IsMemBarrier)
+      LoadBarriers.insert(Index);
+    assignLQSlot(Index);
+  }
+
+  if (Desc.MayStore) {
+    if (IsMemBarrier)
+      StoreBarriers.insert(Index);
+    assignSQSlot(Index);
+  }
+}
+
+LSUnit::Status LSUnit::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  if (Desc.MayLoad && isLQFull())
+    return LSUnit::LSU_LQUEUE_FULL;
+  if (Desc.MayStore && isSQFull())
+    return LSUnit::LSU_SQUEUE_FULL;
+  return LSUnit::LSU_AVAILABLE;
+}
+
+bool LSUnit::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  const unsigned Index = IR.getSourceIndex();
+  bool IsALoad = Desc.MayLoad;
+  bool IsAStore = Desc.MayStore;
+  assert((IsALoad || IsAStore) && "Not a memory operation!");
+  assert((!IsALoad || LoadQueue.count(Index) == 1) && "Load not in queue!");
+  assert((!IsAStore || StoreQueue.count(Index) == 1) && "Store not in queue!");
+
+  if (IsALoad && !LoadBarriers.empty()) {
+    unsigned LoadBarrierIndex = *LoadBarriers.begin();
+    if (Index > LoadBarrierIndex)
+      return false;
+    if (Index == LoadBarrierIndex && Index != *LoadQueue.begin())
+      return false;
+  }
+
+  if (IsAStore && !StoreBarriers.empty()) {
+    unsigned StoreBarrierIndex = *StoreBarriers.begin();
+    if (Index > StoreBarrierIndex)
+      return false;
+    if (Index == StoreBarrierIndex && Index != *StoreQueue.begin())
+      return false;
+  }
+
+  if (NoAlias && IsALoad)
+    return true;
+
+  if (StoreQueue.size()) {
+    // Check if this memory operation is younger than the older store.
+    if (Index > *StoreQueue.begin())
+      return false;
+  }
+
+  // Okay, we are older than the oldest store in the queue.
+  // If there are no pending loads, then we can say for sure that this
+  // instruction is ready.
+  if (isLQEmpty())
+    return true;
+
+  // Check if there are no older loads.
+  if (Index <= *LoadQueue.begin())
+    return true;
+
+  // There is at least one younger load.
+  return !IsAStore;
+}
+
+void LSUnit::onInstructionExecuted(const InstRef &IR) {
+  const unsigned Index = IR.getSourceIndex();
+  std::set<unsigned>::iterator it = LoadQueue.find(Index);
+  if (it != LoadQueue.end()) {
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                      << " has been removed from the load queue.\n");
+    LoadQueue.erase(it);
+  }
+
+  it = StoreQueue.find(Index);
+  if (it != StoreQueue.end()) {
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                      << " has been removed from the store queue.\n");
+    StoreQueue.erase(it);
+  }
+
+  if (!StoreBarriers.empty() && Index == *StoreBarriers.begin()) {
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                      << " has been removed from the set of store barriers.\n");
+    StoreBarriers.erase(StoreBarriers.begin());
+  }
+  if (!LoadBarriers.empty() && Index == *LoadBarriers.begin()) {
+    LLVM_DEBUG(dbgs() << "[LSUnit]: Instruction idx=" << Index
+                      << " has been removed from the set of load barriers.\n");
+    LoadBarriers.erase(LoadBarriers.begin());
+  }
+}
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RegisterFile.cpp
@@ -0,0 +1,350 @@
+//===--------------------- RegisterFile.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a register mapping file class.  This class is responsible
+/// for managing hardware register files and the tracking of data dependencies
+/// between registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/RegisterFile.h"
+#include "Instruction.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+RegisterFile::RegisterFile(const llvm::MCSchedModel &SM,
+                           const llvm::MCRegisterInfo &mri, unsigned NumRegs)
+    : MRI(mri), RegisterMappings(mri.getNumRegs(),
+                                 {WriteRef(), {IndexPlusCostPairTy(0, 1), 0}}) {
+  initialize(SM, NumRegs);
+}
+
+void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
+  // Create a default register file that "sees" all the machine registers
+  // declared by the target. The number of physical registers in the default
+  // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
+  // means: this register file has an unbounded number of physical registers.
+  addRegisterFile({} /* all registers */, NumRegs);
+  if (!SM.hasExtraProcessorInfo())
+    return;
+
+  // For each user defined register file, allocate a RegisterMappingTracker
+  // object. The size of every register file, as well as the mapping between
+  // register files and register classes is specified via tablegen.
+  const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
+  for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
+    const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
+    // Skip invalid register files with zero physical registers.
+    unsigned Length = RF.NumRegisterCostEntries;
+    if (!RF.NumPhysRegs)
+      continue;
+    // The cost of a register definition is equivalent to the number of
+    // physical registers that are allocated at register renaming stage.
+    const MCRegisterCostEntry *FirstElt =
+        &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
+    addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
+                    RF.NumPhysRegs);
+  }
+}
+
+void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
+                                   unsigned NumPhysRegs) {
+  // A default register file is always allocated at index #0. That register file
+  // is mainly used to count the total number of mappings created by all
+  // register files at runtime. Users can limit the number of available physical
+  // registers in register file #0 through the command line flag
+  // `-register-file-size`.
+  unsigned RegisterFileIndex = RegisterFiles.size();
+  RegisterFiles.emplace_back(NumPhysRegs);
+
+  // Special case where there is no register class identifier in the set.
+  // An empty set of register classes means: this register file contains all
+  // the physical registers specified by the target.
+  // We optimistically assume that a register can be renamed at the cost of a
+  // single physical register. The constructor of RegisterFile ensures that
+  // a RegisterMapping exists for each logical register defined by the Target.
+  if (Entries.empty())
+    return;
+
+  // Now update the cost of individual registers.
+  for (const MCRegisterCostEntry &RCE : Entries) {
+    const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
+    for (const MCPhysReg Reg : RC) {
+      RegisterRenamingInfo &Entry = RegisterMappings[Reg].second;
+      IndexPlusCostPairTy &IPC = Entry.IndexPlusCost;
+      if (IPC.first && IPC.first != RegisterFileIndex) {
+        // The only register file that is allowed to overlap is the default
+        // register file at index #0. The analysis is inaccurate if register
+        // files overlap.
+        errs() << "warning: register " << MRI.getName(Reg)
+               << " defined in multiple register files.";
+      }
+      IPC = std::make_pair(RegisterFileIndex, RCE.Cost);
+      Entry.RenameAs = Reg;
+
+      // Assume the same cost for each sub-register.
+      for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
+        RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+        if (!OtherEntry.IndexPlusCost.first &&
+            (!OtherEntry.RenameAs ||
+             MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+          OtherEntry.IndexPlusCost = IPC;
+          OtherEntry.RenameAs = Reg;
+        }
+      }
+    }
+  }
+}
+
+void RegisterFile::allocatePhysRegs(const RegisterRenamingInfo &Entry,
+                                    MutableArrayRef<unsigned> UsedPhysRegs) {
+  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+  unsigned Cost = Entry.IndexPlusCost.second;
+  if (RegisterFileIndex) {
+    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+    RMT.NumUsedPhysRegs += Cost;
+    UsedPhysRegs[RegisterFileIndex] += Cost;
+  }
+
+  // Now update the default register mapping tracker.
+  RegisterFiles[0].NumUsedPhysRegs += Cost;
+  UsedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::freePhysRegs(const RegisterRenamingInfo &Entry,
+                                MutableArrayRef<unsigned> FreedPhysRegs) {
+  unsigned RegisterFileIndex = Entry.IndexPlusCost.first;
+  unsigned Cost = Entry.IndexPlusCost.second;
+  if (RegisterFileIndex) {
+    RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
+    RMT.NumUsedPhysRegs -= Cost;
+    FreedPhysRegs[RegisterFileIndex] += Cost;
+  }
+
+  // Now update the default register mapping tracker.
+  RegisterFiles[0].NumUsedPhysRegs -= Cost;
+  FreedPhysRegs[0] += Cost;
+}
+
+void RegisterFile::addRegisterWrite(WriteRef Write,
+                                    MutableArrayRef<unsigned> UsedPhysRegs,
+                                    bool ShouldAllocatePhysRegs) {
+  WriteState &WS = *Write.getWriteState();
+  unsigned RegID = WS.getRegisterID();
+  assert(RegID && "Adding an invalid register definition?");
+
+  LLVM_DEBUG({
+    dbgs() << "RegisterFile: addRegisterWrite [ " << Write.getSourceIndex()
+           << ", " << MRI.getName(RegID) << "]\n";
+  });
+
+  // If RenameAs is equal to RegID, then RegID is subject to register renaming
+  // and false dependencies on RegID are all eliminated.
+
+  // If RenameAs references the invalid register, then we optimistically assume
+  // that it can be renamed. In the absence of tablegen descriptors for register
+  // files, RenameAs is always set to the invalid register ID.  In all other
+  // cases, RenameAs must be either equal to RegID, or it must reference a
+  // super-register of RegID.
+
+  // If RenameAs is a super-register of RegID, then a write to RegID has always
+  // a false dependency on RenameAs. The only exception is for when the write
+  // implicitly clears the upper portion of the underlying register.
+  // If a write clears its super-registers, then it is renamed as `RenameAs`.
+  const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+  if (RRI.RenameAs && RRI.RenameAs != RegID) {
+    RegID = RRI.RenameAs;
+    WriteRef &OtherWrite = RegisterMappings[RegID].first;
+
+    if (!WS.clearsSuperRegisters()) {
+      // The processor keeps the definition of `RegID` together with register
+      // `RenameAs`. Since this partial write is not renamed, no physical
+      // register is allocated.
+      ShouldAllocatePhysRegs = false;
+
+      if (OtherWrite.getWriteState() &&
+          (OtherWrite.getSourceIndex() != Write.getSourceIndex())) {
+        // This partial write has a false dependency on RenameAs.
+        WS.setDependentWrite(OtherWrite.getWriteState());
+      }
+    }
+  }
+
+  // Update the mapping for register RegID including its sub-registers.
+  RegisterMappings[RegID].first = Write;
+  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
+    RegisterMappings[*I].first = Write;
+
+  // No physical registers are allocated for instructions that are optimized in
+  // hardware. For example, zero-latency data-dependency breaking instructions
+  // don't consume physical registers.
+  if (ShouldAllocatePhysRegs)
+    allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+
+  if (!WS.clearsSuperRegisters())
+    return;
+
+  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
+    RegisterMappings[*I].first = Write;
+}
+
+void RegisterFile::removeRegisterWrite(const WriteState &WS,
+                                       MutableArrayRef<unsigned> FreedPhysRegs,
+                                       bool ShouldFreePhysRegs) {
+  unsigned RegID = WS.getRegisterID();
+
+  assert(RegID != 0 && "Invalidating an already invalid register?");
+  assert(WS.getCyclesLeft() != UNKNOWN_CYCLES &&
+         "Invalidating a write of unknown cycles!");
+  assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
+
+  unsigned RenameAs = RegisterMappings[RegID].second.RenameAs;
+  if (RenameAs && RenameAs != RegID) {
+    RegID = RenameAs;
+
+    if (!WS.clearsSuperRegisters()) {
+      // Keep the definition of `RegID` together with register `RenameAs`.
+      ShouldFreePhysRegs = false;
+    }
+  }
+
+  if (ShouldFreePhysRegs)
+    freePhysRegs(RegisterMappings[RegID].second, FreedPhysRegs);
+
+  WriteRef &WR = RegisterMappings[RegID].first;
+  if (WR.getWriteState() == &WS)
+    WR.invalidate();
+
+  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    WriteRef &OtherWR = RegisterMappings[*I].first;
+    if (OtherWR.getWriteState() == &WS)
+      OtherWR.invalidate();
+  }
+
+  if (!WS.clearsSuperRegisters())
+    return;
+
+  for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    WriteRef &OtherWR = RegisterMappings[*I].first;
+    if (OtherWR.getWriteState() == &WS)
+      OtherWR.invalidate();
+  }
+}
+
+void RegisterFile::collectWrites(SmallVectorImpl<WriteRef> &Writes,
+                                 unsigned RegID) const {
+  assert(RegID && RegID < RegisterMappings.size());
+  LLVM_DEBUG(dbgs() << "RegisterFile: collecting writes for register "
+                    << MRI.getName(RegID) << '\n');
+  const WriteRef &WR = RegisterMappings[RegID].first;
+  if (WR.isValid())
+    Writes.push_back(WR);
+
+  // Handle potential partial register updates.
+  for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+    const WriteRef &WR = RegisterMappings[*I].first;
+    if (WR.isValid())
+      Writes.push_back(WR);
+  }
+
+  // Remove duplicate entries and resize the input vector.
+  llvm::sort(Writes.begin(), Writes.end(),
+             [](const WriteRef &Lhs, const WriteRef &Rhs) {
+               return Lhs.getWriteState() < Rhs.getWriteState();
+             });
+  auto It = std::unique(Writes.begin(), Writes.end());
+  Writes.resize(std::distance(Writes.begin(), It));
+
+  LLVM_DEBUG({
+    for (const WriteRef &WR : Writes) {
+      const WriteState &WS = *WR.getWriteState();
+      dbgs() << "[PRF] Found a dependent use of Register "
+             << MRI.getName(WS.getRegisterID()) << " (defined by intruction #"
+             << WR.getSourceIndex() << ")\n";
+    }
+  });
+}
+
+unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
+  SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());
+
+  // Find how many new mappings must be created for each register file.
+  for (const unsigned RegID : Regs) {
+    const RegisterRenamingInfo &RRI = RegisterMappings[RegID].second;
+    const IndexPlusCostPairTy &Entry = RRI.IndexPlusCost;
+    if (Entry.first)
+      NumPhysRegs[Entry.first] += Entry.second;
+    NumPhysRegs[0] += Entry.second;
+  }
+
+  unsigned Response = 0;
+  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+    unsigned NumRegs = NumPhysRegs[I];
+    if (!NumRegs)
+      continue;
+
+    const RegisterMappingTracker &RMT = RegisterFiles[I];
+    if (!RMT.NumPhysRegs) {
+      // The register file has an unbounded number of microarchitectural
+      // registers.
+      continue;
+    }
+
+    if (RMT.NumPhysRegs < NumRegs) {
+      // The current register file is too small. This may occur if the number of
+      // microarchitectural registers in register file #0 was changed by the
+      // users via flag -reg-file-size. Alternatively, the scheduling model
+      // specified a too small number of registers for this register file.
+      LLVM_DEBUG(dbgs() << "Not enough registers in the register file.\n");
+
+      // FIXME: Normalize the instruction register count to match the
+      // NumPhysRegs value.  This is a highly unusual case, and is not expected
+      // to occur.  This normalization is hiding an inconsistency in either the
+      // scheduling model or in the value that the user might have specified
+      // for NumPhysRegs.
+      NumRegs = RMT.NumPhysRegs;
+    }
+
+    if (RMT.NumPhysRegs < (RMT.NumUsedPhysRegs + NumRegs))
+      Response |= (1U << I);
+  }
+
+  return Response;
+}
+
+#ifndef NDEBUG
+void RegisterFile::dump() const {
+  for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
+    const RegisterMapping &RM = RegisterMappings[I];
+    if (!RM.first.getWriteState())
+      continue;
+    const RegisterRenamingInfo &RRI = RM.second;
+    dbgs() << MRI.getName(I) << ", " << I << ", PRF=" << RRI.IndexPlusCost.first
+           << ", Cost=" << RRI.IndexPlusCost.second
+           << ", RenameAs=" << RRI.RenameAs << ", ";
+    RM.first.dump();
+    dbgs() << '\n';
+  }
+
+  for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
+    dbgs() << "Register File #" << I;
+    const RegisterMappingTracker &RMT = RegisterFiles[I];
+    dbgs() << "\n  TotalMappings:        " << RMT.NumPhysRegs
+           << "\n  NumUsedMappings:      " << RMT.NumUsedPhysRegs << '\n';
+  }
+}
+#endif
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/ResourceManager.cpp
@@ -0,0 +1,309 @@
+//===--------------------- ResourceManager.cpp ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The classes here represent processor resource units and their management
+/// strategy.  These classes are managed by the Scheduler.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/ResourceManager.h"
+#include "Support.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+ResourceStrategy::~ResourceStrategy() = default;
+
+void DefaultResourceStrategy::skipMask(uint64_t Mask) {
+  NextInSequenceMask &= (~Mask);
+  if (!NextInSequenceMask) {
+    NextInSequenceMask = ResourceUnitMask ^ RemovedFromNextInSequence;
+    RemovedFromNextInSequence = 0;
+  }
+}
+
+uint64_t DefaultResourceStrategy::select(uint64_t ReadyMask) {
+  // This method assumes that ReadyMask cannot be zero.
+  uint64_t CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask);
+  while (!(ReadyMask & CandidateMask)) {
+    skipMask(CandidateMask);
+    CandidateMask = llvm::PowerOf2Floor(NextInSequenceMask);
+  }
+  return CandidateMask;
+}
+
+void DefaultResourceStrategy::used(uint64_t Mask) {
+  if (Mask > NextInSequenceMask) {
+    RemovedFromNextInSequence |= Mask;
+    return;
+  }
+  skipMask(Mask);
+}
+
+ResourceState::ResourceState(const MCProcResourceDesc &Desc, unsigned Index,
+                             uint64_t Mask)
+    : ProcResourceDescIndex(Index), ResourceMask(Mask),
+      BufferSize(Desc.BufferSize) {
+  if (llvm::countPopulation(ResourceMask) > 1)
+    ResourceSizeMask = ResourceMask ^ llvm::PowerOf2Floor(ResourceMask);
+  else
+    ResourceSizeMask = (1ULL << Desc.NumUnits) - 1;
+  ReadyMask = ResourceSizeMask;
+  AvailableSlots = BufferSize == -1 ? 0U : static_cast<unsigned>(BufferSize);
+  Unavailable = false;
+}
+
+bool ResourceState::isReady(unsigned NumUnits) const {
+  return (!isReserved() || isADispatchHazard()) &&
+         llvm::countPopulation(ReadyMask) >= NumUnits;
+}
+
+ResourceStateEvent ResourceState::isBufferAvailable() const {
+  if (isADispatchHazard() && isReserved())
+    return RS_RESERVED;
+  if (!isBuffered() || AvailableSlots)
+    return RS_BUFFER_AVAILABLE;
+  return RS_BUFFER_UNAVAILABLE;
+}
+
+#ifndef NDEBUG
+void ResourceState::dump() const {
+  dbgs() << "MASK: " << ResourceMask << ", SIZE_MASK: " << ResourceSizeMask
+         << ", RDYMASK: " << ReadyMask << ", BufferSize=" << BufferSize
+         << ", AvailableSlots=" << AvailableSlots
+         << ", Reserved=" << Unavailable << '\n';
+}
+#endif
+
+static unsigned getResourceStateIndex(uint64_t Mask) {
+  return std::numeric_limits<uint64_t>::digits - llvm::countLeadingZeros(Mask);
+}
+
+static std::unique_ptr<ResourceStrategy>
+getStrategyFor(const ResourceState &RS) {
+  if (RS.isAResourceGroup() || RS.getNumUnits() > 1)
+    return llvm::make_unique<DefaultResourceStrategy>(RS.getReadyMask());
+  return std::unique_ptr<ResourceStrategy>(nullptr);
+}
+
+ResourceManager::ResourceManager(const MCSchedModel &SM)
+    : ProcResID2Mask(SM.getNumProcResourceKinds()) {
+  computeProcResourceMasks(SM, ProcResID2Mask);
+  Resources.resize(SM.getNumProcResourceKinds());
+  Strategies.resize(SM.getNumProcResourceKinds());
+
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    uint64_t Mask = ProcResID2Mask[I];
+    unsigned Index = getResourceStateIndex(Mask);
+    Resources[Index] =
+        llvm::make_unique<ResourceState>(*SM.getProcResource(I), I, Mask);
+    Strategies[Index] = getStrategyFor(*Resources[Index]);
+  }
+}
+
+void ResourceManager::setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S,
+                                            uint64_t ResourceMask) {
+  unsigned Index = getResourceStateIndex(ResourceMask);
+  assert(Index < Resources.size() && "Invalid processor resource index!");
+  assert(S && "Unexpected null strategy in input!");
+  Strategies[Index] = std::move(S);
+}
+
+unsigned ResourceManager::resolveResourceMask(uint64_t Mask) const {
+  return Resources[getResourceStateIndex(Mask)]->getProcResourceID();
+}
+
+unsigned ResourceManager::getNumUnits(uint64_t ResourceID) const {
+  return Resources[getResourceStateIndex(ResourceID)]->getNumUnits();
+}
+
+// Returns the actual resource consumed by this Use.
+// First, is the primary resource ID.
+// Second, is the specific sub-resource ID.
+ResourceRef ResourceManager::selectPipe(uint64_t ResourceID) {
+  unsigned Index = getResourceStateIndex(ResourceID);
+  ResourceState &RS = *Resources[Index];
+  assert(RS.isReady() && "No available units to select!");
+
+  // Special case where RS is not a group, and it only declares a single
+  // resource unit.
+  if (!RS.isAResourceGroup() && RS.getNumUnits() == 1)
+    return std::make_pair(ResourceID, RS.getReadyMask());
+
+  uint64_t SubResourceID = Strategies[Index]->select(RS.getReadyMask());
+  if (RS.isAResourceGroup())
+    return selectPipe(SubResourceID);
+  return std::make_pair(ResourceID, SubResourceID);
+}
+
+void ResourceManager::use(const ResourceRef &RR) {
+  // Mark the sub-resource referenced by RR as used.
+  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+  RS.markSubResourceAsUsed(RR.second);
+  // If there are still available units in RR.first,
+  // then we are done.
+  if (RS.isReady())
+    return;
+
+  // Notify to other resources that RR.first is no longer available.
+  for (std::unique_ptr<ResourceState> &Res : Resources) {
+    ResourceState &Current = *Res;
+    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+      continue;
+
+    if (Current.containsResource(RR.first)) {
+      unsigned Index = getResourceStateIndex(Current.getResourceMask());
+      Current.markSubResourceAsUsed(RR.first);
+      Strategies[Index]->used(RR.first);
+    }
+  }
+}
+
+void ResourceManager::release(const ResourceRef &RR) {
+  ResourceState &RS = *Resources[getResourceStateIndex(RR.first)];
+  bool WasFullyUsed = !RS.isReady();
+  RS.releaseSubResource(RR.second);
+  if (!WasFullyUsed)
+    return;
+
+  for (std::unique_ptr<ResourceState> &Res : Resources) {
+    ResourceState &Current = *Res;
+    if (!Current.isAResourceGroup() || Current.getResourceMask() == RR.first)
+      continue;
+
+    if (Current.containsResource(RR.first))
+      Current.releaseSubResource(RR.first);
+  }
+}
+
+ResourceStateEvent
+ResourceManager::canBeDispatched(ArrayRef<uint64_t> Buffers) const {
+  ResourceStateEvent Result = ResourceStateEvent::RS_BUFFER_AVAILABLE;
+  for (uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    Result = RS.isBufferAvailable();
+    if (Result != ResourceStateEvent::RS_BUFFER_AVAILABLE)
+      break;
+  }
+  return Result;
+}
+
+void ResourceManager::reserveBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t Buffer : Buffers) {
+    ResourceState &RS = *Resources[getResourceStateIndex(Buffer)];
+    assert(RS.isBufferAvailable() == ResourceStateEvent::RS_BUFFER_AVAILABLE);
+    RS.reserveBuffer();
+
+    if (RS.isADispatchHazard()) {
+      assert(!RS.isReserved());
+      RS.setReserved();
+    }
+  }
+}
+
+void ResourceManager::releaseBuffers(ArrayRef<uint64_t> Buffers) {
+  for (const uint64_t R : Buffers)
+    Resources[getResourceStateIndex(R)]->releaseBuffer();
+}
+
+bool ResourceManager::canBeIssued(const InstrDesc &Desc) const {
+  return std::all_of(Desc.Resources.begin(), Desc.Resources.end(),
+                     [&](const std::pair<uint64_t, const ResourceUsage> &E) {
+                       unsigned NumUnits =
+                           E.second.isReserved() ? 0U : E.second.NumUnits;
+                       unsigned Index = getResourceStateIndex(E.first);
+                       return Resources[Index]->isReady(NumUnits);
+                     });
+}
+
+// Returns true if all resources are in-order, and there is at least one
+// resource which is a dispatch hazard (BufferSize = 0).
+bool ResourceManager::mustIssueImmediately(const InstrDesc &Desc) const {
+  if (!canBeIssued(Desc))
+    return false;
+  bool AllInOrderResources = all_of(Desc.Buffers, [&](uint64_t BufferMask) {
+    unsigned Index = getResourceStateIndex(BufferMask);
+    const ResourceState &Resource = *Resources[Index];
+    return Resource.isInOrder() || Resource.isADispatchHazard();
+  });
+  if (!AllInOrderResources)
+    return false;
+
+  return any_of(Desc.Buffers, [&](uint64_t BufferMask) {
+    return Resources[getResourceStateIndex(BufferMask)]->isADispatchHazard();
+  });
+}
+
+void ResourceManager::issueInstruction(
+    const InstrDesc &Desc,
+    SmallVectorImpl<std::pair<ResourceRef, double>> &Pipes) {
+  for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
+    const CycleSegment &CS = R.second.CS;
+    if (!CS.size()) {
+      releaseResource(R.first);
+      continue;
+    }
+
+    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+    if (!R.second.isReserved()) {
+      ResourceRef Pipe = selectPipe(R.first);
+      use(Pipe);
+      BusyResources[Pipe] += CS.size();
+      // Replace the resource mask with a valid processor resource index.
+      const ResourceState &RS = *Resources[getResourceStateIndex(Pipe.first)];
+      Pipe.first = RS.getProcResourceID();
+      Pipes.emplace_back(
+          std::pair<ResourceRef, double>(Pipe, static_cast<double>(CS.size())));
+    } else {
+      assert((countPopulation(R.first) > 1) && "Expected a group!");
+      // Mark this group as reserved.
+      assert(R.second.isReserved());
+      reserveResource(R.first);
+      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+    }
+  }
+}
+
+void ResourceManager::cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed) {
+  for (std::pair<ResourceRef, unsigned> &BR : BusyResources) {
+    if (BR.second)
+      BR.second--;
+    if (!BR.second) {
+      // Release this resource.
+      const ResourceRef &RR = BR.first;
+
+      if (countPopulation(RR.first) == 1)
+        release(RR);
+
+      releaseResource(RR.first);
+      ResourcesFreed.push_back(RR);
+    }
+  }
+
+  for (const ResourceRef &RF : ResourcesFreed)
+    BusyResources.erase(RF);
+}
+
+void ResourceManager::reserveResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  assert(!Resource.isReserved());
+  Resource.setReserved();
+}
+
+void ResourceManager::releaseResource(uint64_t ResourceID) {
+  ResourceState &Resource = *Resources[getResourceStateIndex(ResourceID)];
+  Resource.clearReserved();
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/RetireControlUnit.cpp
@@ -0,0 +1,87 @@
+//===---------------------- RetireControlUnit.cpp ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file simulates the hardware responsible for retiring instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/RetireControlUnit.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM)
+    : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
+      AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0) {
+  // Check if the scheduling model provides extra information about the machine
+  // processor. If so, then use that information to set the reorder buffer size
+  // and the maximum number of instructions retired per cycle.
+  if (SM.hasExtraProcessorInfo()) {
+    const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
+    if (EPI.ReorderBufferSize)
+      AvailableSlots = EPI.ReorderBufferSize;
+    MaxRetirePerCycle = EPI.MaxRetirePerCycle;
+  }
+
+  assert(AvailableSlots && "Invalid reorder buffer size!");
+  Queue.resize(AvailableSlots);
+}
+
+// Reserves a number of slots, and returns a new token.
+unsigned RetireControlUnit::reserveSlot(const InstRef &IR,
+                                        unsigned NumMicroOps) {
+  assert(isAvailable(NumMicroOps));
+  unsigned NormalizedQuantity =
+      std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
+  // Zero latency instructions may have zero mOps. Artificially bump this
+  // value to 1. Although zero latency instructions don't consume scheduler
+  // resources, they still consume one slot in the retire queue.
+  NormalizedQuantity = std::max(NormalizedQuantity, 1U);
+  unsigned TokenID = NextAvailableSlotIdx;
+  Queue[NextAvailableSlotIdx] = {IR, NormalizedQuantity, false};
+  NextAvailableSlotIdx += NormalizedQuantity;
+  NextAvailableSlotIdx %= Queue.size();
+  AvailableSlots -= NormalizedQuantity;
+  return TokenID;
+}
+
+const RetireControlUnit::RUToken &RetireControlUnit::peekCurrentToken() const {
+  return Queue[CurrentInstructionSlotIdx];
+}
+
+void RetireControlUnit::consumeCurrentToken() {
+  const RetireControlUnit::RUToken &Current = peekCurrentToken();
+  assert(Current.NumSlots && "Reserved zero slots?");
+  assert(Current.IR.isValid() && "Invalid RUToken in the RCU queue.");
+
+  // Update the slot index to be the next item in the circular queue.
+  CurrentInstructionSlotIdx += Current.NumSlots;
+  CurrentInstructionSlotIdx %= Queue.size();
+  AvailableSlots += Current.NumSlots;
+}
+
+void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
+  assert(Queue.size() > TokenID);
+  assert(Queue[TokenID].Executed == false && Queue[TokenID].IR.isValid());
+  Queue[TokenID].Executed = true;
+}
+
+#ifndef NDEBUG
+void RetireControlUnit::dump() const {
+  dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
+         << ", Available Slots=" << AvailableSlots << " }\n";
+}
+#endif
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp
+++ llvm/trunk/tools/llvm-mca/lib/HardwareUnits/Scheduler.cpp
@@ -0,0 +1,244 @@
+//===--------------------- Scheduler.cpp ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A scheduler for processor resource units and processor resource groups.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+
+void Scheduler::initializeStrategy(std::unique_ptr<SchedulerStrategy> S) {
+  // Ensure we have a valid (non-null) strategy object.
+  Strategy = S ? std::move(S) : llvm::make_unique<DefaultSchedulerStrategy>();
+}
+
+// Anchor the vtable of SchedulerStrategy and DefaultSchedulerStrategy.
+SchedulerStrategy::~SchedulerStrategy() = default;
+DefaultSchedulerStrategy::~DefaultSchedulerStrategy() = default;
+
+#ifndef NDEBUG
+void Scheduler::dump() const {
+  dbgs() << "[SCHEDULER]: WaitSet size is: " << WaitSet.size() << '\n';
+  dbgs() << "[SCHEDULER]: ReadySet size is: " << ReadySet.size() << '\n';
+  dbgs() << "[SCHEDULER]: IssuedSet size is: " << IssuedSet.size() << '\n';
+  Resources->dump();
+}
+#endif
+
+Scheduler::Status Scheduler::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+
+  switch (Resources->canBeDispatched(Desc.Buffers)) {
+  case ResourceStateEvent::RS_BUFFER_UNAVAILABLE:
+    return Scheduler::SC_BUFFERS_FULL;
+  case ResourceStateEvent::RS_RESERVED:
+    return Scheduler::SC_DISPATCH_GROUP_STALL;
+  case ResourceStateEvent::RS_BUFFER_AVAILABLE:
+    break;
+  }
+
+  // Give lower priority to LSUnit stall events.
+  switch (LSU->isAvailable(IR)) {
+  case LSUnit::LSU_LQUEUE_FULL:
+    return Scheduler::SC_LOAD_QUEUE_FULL;
+  case LSUnit::LSU_SQUEUE_FULL:
+    return Scheduler::SC_STORE_QUEUE_FULL;
+  case LSUnit::LSU_AVAILABLE:
+    return Scheduler::SC_AVAILABLE;
+  }
+
+  llvm_unreachable("Don't know how to process this LSU state result!");
+}
+
+void Scheduler::issueInstructionImpl(
+    InstRef &IR,
+    SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources) {
+  Instruction *IS = IR.getInstruction();
+  const InstrDesc &D = IS->getDesc();
+
+  // Issue the instruction and collect all the consumed resources
+  // into a vector. That vector is then used to notify the listener.
+  Resources->issueInstruction(D, UsedResources);
+
+  // Notify the instruction that it started executing.
+  // This updates the internal state of each write.
+  IS->execute();
+
+  if (IS->isExecuting())
+    IssuedSet.emplace_back(IR);
+  else if (IS->isExecuted())
+    LSU->onInstructionExecuted(IR);
+}
+
+// Release the buffered resources and issue the instruction.
+void Scheduler::issueInstruction(
+    InstRef &IR, SmallVectorImpl<std::pair<ResourceRef, double>> &UsedResources,
+    SmallVectorImpl<InstRef> &ReadyInstructions) {
+  const Instruction &Inst = *IR.getInstruction();
+  bool HasDependentUsers = Inst.hasDependentUsers();
+
+  Resources->releaseBuffers(Inst.getDesc().Buffers);
+  issueInstructionImpl(IR, UsedResources);
+  // Instructions that have been issued during this cycle might have unblocked
+  // other dependent instructions. Dependent instructions may be issued during
+  // this same cycle if operands have ReadAdvance entries.  Promote those
+  // instructions to the ReadySet and notify the caller that those are ready.
+  if (HasDependentUsers)
+    promoteToReadySet(ReadyInstructions);
+}
+
+void Scheduler::promoteToReadySet(SmallVectorImpl<InstRef> &Ready) {
+  // Scan the set of waiting instructions and promote them to the
+  // ready queue if operands are all ready.
+  unsigned RemovedElements = 0;
+  for (auto I = WaitSet.begin(), E = WaitSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR.isValid())
+      break;
+
+    // Check if this instruction is now ready. In case, force
+    // a transition in state using method 'update()'.
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isReady())
+      IS.update();
+
+    // Check if there are still unsolved data dependencies.
+    if (!isReady(IR)) {
+      ++I;
+      continue;
+    }
+
+    Ready.emplace_back(IR);
+    ReadySet.emplace_back(IR);
+
+    IR.invalidate();
+    ++RemovedElements;
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  WaitSet.resize(WaitSet.size() - RemovedElements);
+}
+
+InstRef Scheduler::select() {
+  unsigned QueueIndex = ReadySet.size();
+  for (unsigned I = 0, E = ReadySet.size(); I != E; ++I) {
+    const InstRef &IR = ReadySet[I];
+    if (QueueIndex == ReadySet.size() ||
+        Strategy->compare(IR, ReadySet[QueueIndex])) {
+      const InstrDesc &D = IR.getInstruction()->getDesc();
+      if (Resources->canBeIssued(D))
+        QueueIndex = I;
+    }
+  }
+
+  if (QueueIndex == ReadySet.size())
+    return InstRef();
+
+  // We found an instruction to issue.
+  InstRef IR = ReadySet[QueueIndex];
+  std::swap(ReadySet[QueueIndex], ReadySet[ReadySet.size() - 1]);
+  ReadySet.pop_back();
+  return IR;
+}
+
+void Scheduler::updateIssuedSet(SmallVectorImpl<InstRef> &Executed) {
+  unsigned RemovedElements = 0;
+  for (auto I = IssuedSet.begin(), E = IssuedSet.end(); I != E;) {
+    InstRef &IR = *I;
+    if (!IR.isValid())
+      break;
+    Instruction &IS = *IR.getInstruction();
+    if (!IS.isExecuted()) {
+      LLVM_DEBUG(dbgs() << "[SCHEDULER]: Instruction #" << IR
+                        << " is still executing.\n");
+      ++I;
+      continue;
+    }
+
+    // Instruction IR has completed execution.
+    LSU->onInstructionExecuted(IR);
+    Executed.emplace_back(IR);
+    ++RemovedElements;
+    IR.invalidate();
+    std::iter_swap(I, E - RemovedElements);
+  }
+
+  IssuedSet.resize(IssuedSet.size() - RemovedElements);
+}
+
+void Scheduler::cycleEvent(SmallVectorImpl<ResourceRef> &Freed,
+                           SmallVectorImpl<InstRef> &Executed,
+                           SmallVectorImpl<InstRef> &Ready) {
+  // Release consumed resources.
+  Resources->cycleEvent(Freed);
+
+  // Propagate the cycle event to the 'Issued' and 'Wait' sets.
+  for (InstRef &IR : IssuedSet)
+    IR.getInstruction()->cycleEvent();
+
+  updateIssuedSet(Executed);
+
+  for (InstRef &IR : WaitSet)
+    IR.getInstruction()->cycleEvent();
+
+  promoteToReadySet(Ready);
+}
+
+bool Scheduler::mustIssueImmediately(const InstRef &IR) const {
+  // Instructions that use an in-order dispatch/issue processor resource must be
+  // issued immediately to the pipeline(s). Any other in-order buffered
+  // resources (i.e. BufferSize=1) is consumed.
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  return Desc.isZeroLatency() || Resources->mustIssueImmediately(Desc);
+}
+
+void Scheduler::dispatch(const InstRef &IR) {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  Resources->reserveBuffers(Desc.Buffers);
+
+  // If necessary, reserve queue entries in the load-store unit (LSU).
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  if (IsMemOp)
+    LSU->dispatch(IR);
+
+  if (!isReady(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the WaitSet\n");
+    WaitSet.push_back(IR);
+    return;
+  }
+
+  // Don't add a zero-latency instruction to the Ready queue.
+  // A zero-latency instruction doesn't consume any scheduler resources. That is
+  // because it doesn't need to be executed, and it is often removed at register
+  // renaming stage. For example, register-register moves are often optimized at
+  // register renaming stage by simply updating register aliases. On some
+  // targets, zero-idiom instructions (for example: a xor that clears the value
+  // of a register) are treated specially, and are often eliminated at register
+  // renaming stage.
+  if (!mustIssueImmediately(IR)) {
+    LLVM_DEBUG(dbgs() << "[SCHEDULER] Adding #" << IR << " to the ReadySet\n");
+    ReadySet.push_back(IR);
+  }
+}
+
+bool Scheduler::isReady(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  bool IsMemOp = Desc.MayLoad || Desc.MayStore;
+  return IR.getInstruction()->isReady() && (!IsMemOp || LSU->isReady(IR));
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp
+++ llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp
@@ -0,0 +1,485 @@
+//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the InstrBuilder interface.
+///
+//===----------------------------------------------------------------------===//
+
+#include "InstrBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+using namespace llvm;
+
+static void initializeUsedResources(InstrDesc &ID,
+                                    const MCSchedClassDesc &SCDesc,
+                                    const MCSubtargetInfo &STI,
+                                    ArrayRef<uint64_t> ProcResourceMasks) {
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Populate resources consumed.
+  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
+  std::vector<ResourcePlusCycles> Worklist;
+
+  // Track cycles contributed by resources that are in a "Super" relationship.
+  // This is required if we want to correctly match the behavior of method
+  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
+  // of "consumed" processor resources and resource cycles, the logic in
+  // ExpandProcResource() doesn't update the number of resource cycles
+  // contributed by a "Super" resource to a group.
+  // We need to take this into account when we find that a processor resource is
+  // part of a group, and it is also used as the "Super" of other resources.
+  // This map stores the number of cycles contributed by sub-resources that are
+  // part of a "Super" resource. The key value is the "Super" resource mask ID.
+  DenseMap<uint64_t, unsigned> SuperResources;
+
+  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
+    const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
+    const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
+    uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
+    if (PR.BufferSize != -1)
+      ID.Buffers.push_back(Mask);
+    CycleSegment RCy(0, PRE->Cycles, false);
+    Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
+    if (PR.SuperIdx) {
+      uint64_t Super = ProcResourceMasks[PR.SuperIdx];
+      SuperResources[Super] += PRE->Cycles;
+    }
+  }
+
+  // Sort elements by mask popcount, so that we prioritize resource units over
+  // resource groups, and smaller groups over larger groups.
+  llvm::sort(Worklist.begin(), Worklist.end(),
+             [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
+               unsigned popcntA = countPopulation(A.first);
+               unsigned popcntB = countPopulation(B.first);
+               if (popcntA < popcntB)
+                 return true;
+               if (popcntA > popcntB)
+                 return false;
+               return A.first < B.first;
+             });
+
+  uint64_t UsedResourceUnits = 0;
+
+  // Remove cycles contributed by smaller resources.
+  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+    ResourcePlusCycles &A = Worklist[I];
+    if (!A.second.size()) {
+      A.second.NumUnits = 0;
+      A.second.setReserved();
+      ID.Resources.emplace_back(A);
+      continue;
+    }
+
+    ID.Resources.emplace_back(A);
+    uint64_t NormalizedMask = A.first;
+    if (countPopulation(A.first) == 1) {
+      UsedResourceUnits |= A.first;
+    } else {
+      // Remove the leading 1 from the resource group mask.
+      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+    }
+
+    for (unsigned J = I + 1; J < E; ++J) {
+      ResourcePlusCycles &B = Worklist[J];
+      if ((NormalizedMask & B.first) == NormalizedMask) {
+        B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
+        if (countPopulation(B.first) > 1)
+          B.second.NumUnits++;
+      }
+    }
+  }
+
+  // A SchedWrite may specify a number of cycles in which a resource group
+  // is reserved. For example (on target x86; cpu Haswell):
+  //
+  //  SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
+  //    let ResourceCycles = [2, 2, 3];
+  //  }
+  //
+  // This means:
+  // Resource units HWPort0 and HWPort1 are both used for 2cy.
+  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
+  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
+  // will not be usable for 2 entire cycles from instruction issue.
+  //
+  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
+  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
+  // extra delay on top of the 2 cycles latency.
+  // During those extra cycles, HWPort01 is not usable by other instructions.
+  for (ResourcePlusCycles &RPC : ID.Resources) {
+    if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
+      // Remove the leading 1 from the resource group mask.
+      uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+      if ((Mask & UsedResourceUnits) == Mask)
+        RPC.second.setReserved();
+    }
+  }
+
+  LLVM_DEBUG({
+    for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
+      dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
+    for (const uint64_t R : ID.Buffers)
+      dbgs() << "\t\tBuffer Mask=" << R << '\n';
+  });
+}
+
+static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
+                              const MCSchedClassDesc &SCDesc,
+                              const MCSubtargetInfo &STI) {
+  if (MCDesc.isCall()) {
+    // We cannot estimate how long this call will take.
+    // Artificially set an arbitrarily high latency (100cy).
+    ID.MaxLatency = 100U;
+    return;
+  }
+
+  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
+  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
+  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
+}
+
+Error InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
+                                   unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+
+  // These are for now the (strong) assumptions made by this algorithm:
+  //  * The number of explicit and implicit register definitions in a MCInst
+  //    matches the number of explicit and implicit definitions according to
+  //    the opcode descriptor (MCInstrDesc).
+  //  * Register definitions take precedence over register uses in the operands
+  //    list.
+  //  * If an opcode specifies an optional definition, then the optional
+  //    definition is always the last operand in the sequence, and it can be
+  //    set to zero (i.e. "no register").
+  //
+  // These assumptions work quite well for most out-of-order in-tree targets
+  // like x86. This is mainly because the vast majority of instructions is
+  // expanded to MCInst using a straightforward lowering logic that preserves
+  // the ordering of the operands.
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
+  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
+  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
+  if (MCDesc.hasOptionalDef())
+    TotalDefs++;
+  ID.Writes.resize(TotalDefs);
+  // Iterate over the operands list, and skip non-register operands.
+  // The first NumExplictDefs register operands are expected to be register
+  // definitions.
+  unsigned CurrentDef = 0;
+  unsigned i = 0;
+  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
+    const MCOperand &Op = MCI.getOperand(i);
+    if (!Op.isReg())
+      continue;
+
+    WriteDescriptor &Write = ID.Writes[CurrentDef];
+    Write.OpIndex = i;
+    if (CurrentDef < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+    Write.IsOptionalDef = false;
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+    CurrentDef++;
+  }
+
+  if (CurrentDef != NumExplicitDefs) {
+    return make_error<StringError>(
+        "error: Expected more register operand definitions.",
+        inconvertibleErrorCode());
+  }
+
+  CurrentDef = 0;
+  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
+    unsigned Index = NumExplicitDefs + CurrentDef;
+    WriteDescriptor &Write = ID.Writes[Index];
+    Write.OpIndex = ~CurrentDef;
+    Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
+    if (Index < NumWriteLatencyEntries) {
+      const MCWriteLatencyEntry &WLE =
+          *STI.getWriteLatencyEntry(&SCDesc, Index);
+      // Conservatively default to MaxLatency.
+      Write.Latency =
+          WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
+      Write.SClassOrWriteResourceID = WLE.WriteResourceID;
+    } else {
+      // Assign a default latency for this write.
+      Write.Latency = ID.MaxLatency;
+      Write.SClassOrWriteResourceID = 0;
+    }
+
+    Write.IsOptionalDef = false;
+    assert(Write.RegisterID != 0 && "Expected a valid phys register!");
+    LLVM_DEBUG({
+      dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
+             << ", PhysReg=" << MRI.getName(Write.RegisterID)
+             << ", Latency=" << Write.Latency
+             << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
+    });
+  }
+
+  if (MCDesc.hasOptionalDef()) {
+    // Always assume that the optional definition is the last operand of the
+    // MCInst sequence.
+    const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
+    if (i == MCI.getNumOperands() || !Op.isReg())
+      return make_error<StringError>(
+          "error: expected a register operand for an optional "
+          "definition. Instruction has not be correctly analyzed.",
+          inconvertibleErrorCode());
+
+    WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
+    Write.OpIndex = MCI.getNumOperands() - 1;
+    // Assign a default latency for this write.
+    Write.Latency = ID.MaxLatency;
+    Write.SClassOrWriteResourceID = 0;
+    Write.IsOptionalDef = true;
+  }
+
+  return ErrorSuccess();
+}
+
+Error InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
+                                  unsigned SchedClassID) {
+  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
+  unsigned NumExplicitDefs = MCDesc.getNumDefs();
+
+  // Skip explicit definitions.
+  unsigned i = 0;
+  for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
+    const MCOperand &Op = MCI.getOperand(i);
+    if (Op.isReg())
+      NumExplicitDefs--;
+  }
+
+  if (NumExplicitDefs) {
+    return make_error<StringError>(
+        "error: Expected more register operand definitions. ",
+        inconvertibleErrorCode());
+  }
+
+  unsigned NumExplicitUses = MCI.getNumOperands() - i;
+  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
+  if (MCDesc.hasOptionalDef()) {
+    assert(NumExplicitUses);
+    NumExplicitUses--;
+  }
+  unsigned TotalUses = NumExplicitUses + NumImplicitUses;
+  if (!TotalUses)
+    return ErrorSuccess();
+
+  ID.Reads.resize(TotalUses);
+  for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
+    ReadDescriptor &Read = ID.Reads[CurrentUse];
+    Read.OpIndex = i + CurrentUse;
+    Read.UseIndex = CurrentUse;
+    Read.SchedClassID = SchedClassID;
+    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
+                      << ", UseIndex=" << Read.UseIndex << '\n');
+  }
+
+  for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
+    ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
+    Read.OpIndex = ~CurrentUse;
+    Read.UseIndex = NumExplicitUses + CurrentUse;
+    Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
+    Read.SchedClassID = SchedClassID;
+    LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
+                      << MRI.getName(Read.RegisterID) << '\n');
+  }
+  return ErrorSuccess();
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
+  assert(STI.getSchedModel().hasInstrSchedModel() &&
+         "Itineraries are not yet supported!");
+
+  // Obtain the instruction descriptor from the opcode.
+  unsigned short Opcode = MCI.getOpcode();
+  const MCInstrDesc &MCDesc = MCII.get(Opcode);
+  const MCSchedModel &SM = STI.getSchedModel();
+
+  // Then obtain the scheduling class information from the instruction.
+  unsigned SchedClassID = MCDesc.getSchedClass();
+  unsigned CPUID = SM.getProcessorID();
+
+  // Try to solve variant scheduling classes.
+  if (SchedClassID) {
+    while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
+      SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
+
+    if (!SchedClassID) {
+      return make_error<StringError>("unable to resolve this variant class.",
+                                     inconvertibleErrorCode());
+    }
+  }
+
+  // Check if this instruction is supported. Otherwise, report an error.
+  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
+  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
+    std::string ToString;
+    llvm::raw_string_ostream OS(ToString);
+    WithColor::error() << "found an unsupported instruction in the input"
+                       << " assembly sequence.\n";
+    MCIP.printInst(&MCI, OS, "", STI);
+    OS.flush();
+    WithColor::note() << "instruction: " << ToString << '\n';
+    return make_error<StringError>(
+        "Don't know how to analyze unsupported instructions",
+        inconvertibleErrorCode());
+  }
+
+  // Create a new empty descriptor.
+  std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
+  ID->NumMicroOps = SCDesc.NumMicroOps;
+
+  if (MCDesc.isCall()) {
+    // We don't correctly model calls.
+    WithColor::warning() << "found a call in the input assembly sequence.\n";
+    WithColor::note() << "call instructions are not correctly modeled. "
+                      << "Assume a latency of 100cy.\n";
+  }
+
+  if (MCDesc.isReturn()) {
+    WithColor::warning() << "found a return instruction in the input"
+                         << " assembly sequence.\n";
+    WithColor::note() << "program counter updates are ignored.\n";
+  }
+
+  ID->MayLoad = MCDesc.mayLoad();
+  ID->MayStore = MCDesc.mayStore();
+  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
+
+  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
+  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
+  if (auto Err = populateWrites(*ID, MCI, SchedClassID))
+    return std::move(Err);
+  if (auto Err = populateReads(*ID, MCI, SchedClassID))
+    return std::move(Err);
+
+  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
+  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
+
+  // Now add the new descriptor.
+  SchedClassID = MCDesc.getSchedClass();
+  if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
+    Descriptors[MCI.getOpcode()] = std::move(ID);
+    return *Descriptors[MCI.getOpcode()];
+  }
+
+  VariantDescriptors[&MCI] = std::move(ID);
+  return *VariantDescriptors[&MCI];
+}
+
+Expected<const InstrDesc &>
+InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
+  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
+    return *Descriptors[MCI.getOpcode()];
+
+  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
+    return *VariantDescriptors[&MCI];
+
+  return createInstrDescImpl(MCI);
+}
+
+Expected<std::unique_ptr<Instruction>>
+InstrBuilder::createInstruction(const MCInst &MCI) {
+  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
+  if (!DescOrErr)
+    return DescOrErr.takeError();
+  const InstrDesc &D = *DescOrErr;
+  std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
+
+  // Initialize Reads first.
+  for (const ReadDescriptor &RD : D.Reads) {
+    int RegID = -1;
+    if (!RD.isImplicitRead()) {
+      // explicit read.
+      const MCOperand &Op = MCI.getOperand(RD.OpIndex);
+      // Skip non-register operands.
+      if (!Op.isReg())
+        continue;
+      RegID = Op.getReg();
+    } else {
+      // Implicit read.
+      RegID = RD.RegisterID;
+    }
+
+    // Skip invalid register operands.
+    if (!RegID)
+      continue;
+
+    // Okay, this is a register operand. Create a ReadState for it.
+    assert(RegID > 0 && "Invalid register ID found!");
+    NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
+  }
+
+  // Early exit if there are no writes.
+  if (D.Writes.empty())
+    return std::move(NewIS);
+
+  // Track register writes that implicitly clear the upper portion of the
+  // underlying super-registers using an APInt.
+  APInt WriteMask(D.Writes.size(), 0);
+
+  // Now query the MCInstrAnalysis object to obtain information about which
+  // register writes implicitly clear the upper portion of a super-register.
+  MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
+
+  // Check if this is a dependency breaking instruction.
+  if (MCIA.isDependencyBreaking(STI, MCI))
+    NewIS->setDependencyBreaking();
+
+  // Initialize writes.
+  unsigned WriteIndex = 0;
+  for (const WriteDescriptor &WD : D.Writes) {
+    unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
+                                          : MCI.getOperand(WD.OpIndex).getReg();
+    // Check if this is a optional definition that references NoReg.
+    if (WD.IsOptionalDef && !RegID) {
+      ++WriteIndex;
+      continue;
+    }
+
+    assert(RegID && "Expected a valid register ID!");
+    NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
+        WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));
+    ++WriteIndex;
+  }
+
+  return std::move(NewIS);
+}
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Instruction.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Instruction.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Instruction.cpp
@@ -0,0 +1,177 @@
+//===--------------------- Instruction.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines abstractions used by the Pipeline to model register reads,
+// register writes and instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace mca {
+
+using namespace llvm;
+
+void ReadState::writeStartEvent(unsigned Cycles) {
+  assert(DependentWrites);
+  assert(CyclesLeft == UNKNOWN_CYCLES);
+
+  // This read may be dependent on more than one write. This typically occurs
+  // when a definition is the result of multiple writes where at least one
+  // write does a partial register update.
+  // The HW is forced to do some extra bookkeeping to track of all the
+  // dependent writes, and implement a merging scheme for the partial writes.
+  --DependentWrites;
+  TotalCycles = std::max(TotalCycles, Cycles);
+
+  if (!DependentWrites) {
+    CyclesLeft = TotalCycles;
+    IsReady = !CyclesLeft;
+  }
+}
+
+void WriteState::onInstructionIssued() {
+  assert(CyclesLeft == UNKNOWN_CYCLES);
+  // Update the number of cycles left based on the WriteDescriptor info.
+  CyclesLeft = getLatency();
+
+  // Now that the time left before write-back is known, notify
+  // all the users.
+  for (const std::pair<ReadState *, int> &User : Users) {
+    ReadState *RS = User.first;
+    unsigned ReadCycles = std::max(0, CyclesLeft - User.second);
+    RS->writeStartEvent(ReadCycles);
+  }
+}
+
+void WriteState::addUser(ReadState *User, int ReadAdvance) {
+  // If CyclesLeft is different than -1, then we don't need to
+  // update the list of users. We can just notify the user with
+  // the actual number of cycles left (which may be zero).
+  if (CyclesLeft != UNKNOWN_CYCLES) {
+    unsigned ReadCycles = std::max(0, CyclesLeft - ReadAdvance);
+    User->writeStartEvent(ReadCycles);
+    return;
+  }
+
+  std::pair<ReadState *, int> NewPair(User, ReadAdvance);
+  Users.insert(NewPair);
+}
+
+void WriteState::cycleEvent() {
+  // Note: CyclesLeft can be a negative number. It is an error to
+  // make it an unsigned quantity because users of this write may
+  // specify a negative ReadAdvance.
+  if (CyclesLeft != UNKNOWN_CYCLES)
+    CyclesLeft--;
+}
+
+void ReadState::cycleEvent() {
+  // Update the total number of cycles.
+  if (DependentWrites && TotalCycles) {
+    --TotalCycles;
+    return;
+  }
+
+  // Bail out immediately if we don't know how many cycles are left.
+  if (CyclesLeft == UNKNOWN_CYCLES)
+    return;
+
+  if (CyclesLeft) {
+    --CyclesLeft;
+    IsReady = !CyclesLeft;
+  }
+}
+
+#ifndef NDEBUG
+void WriteState::dump() const {
+  dbgs() << "{ OpIdx=" << WD.OpIndex << ", Lat=" << getLatency() << ", RegID "
+         << getRegisterID() << ", Cycles Left=" << getCyclesLeft() << " }";
+}
+
+void WriteRef::dump() const {
+  dbgs() << "IID=" << getSourceIndex() << ' ';
+  if (isValid())
+    getWriteState()->dump();
+  else
+    dbgs() << "(null)";
+}
+#endif
+
+void Instruction::dispatch(unsigned RCUToken) {
+  assert(Stage == IS_INVALID);
+  Stage = IS_AVAILABLE;
+  RCUTokenID = RCUToken;
+
+  // Check if input operands are already available.
+  update();
+}
+
+void Instruction::execute() {
+  assert(Stage == IS_READY);
+  Stage = IS_EXECUTING;
+
+  // Set the cycles left before the write-back stage.
+  CyclesLeft = Desc.MaxLatency;
+
+  for (UniqueDef &Def : Defs)
+    Def->onInstructionIssued();
+
+  // Transition to the "executed" stage if this is a zero-latency instruction.
+  if (!CyclesLeft)
+    Stage = IS_EXECUTED;
+}
+
+void Instruction::update() {
+  assert(isDispatched() && "Unexpected instruction stage found!");
+
+  if (!llvm::all_of(Uses, [](const UniqueUse &Use) { return Use->isReady(); }))
+    return;
+
+  // A partial register write cannot complete before a dependent write.
+  auto IsDefReady = [&](const UniqueDef &Def) {
+    if (const WriteState *Write = Def->getDependentWrite()) {
+      int WriteLatency = Write->getCyclesLeft();
+      if (WriteLatency == UNKNOWN_CYCLES)
+        return false;
+      return static_cast<unsigned>(WriteLatency) < Desc.MaxLatency;
+    }
+    return true;
+  };
+
+  if (llvm::all_of(Defs, IsDefReady))
+    Stage = IS_READY;
+}
+
+void Instruction::cycleEvent() {
+  if (isReady())
+    return;
+
+  if (isDispatched()) {
+    for (UniqueUse &Use : Uses)
+      Use->cycleEvent();
+
+    update();
+    return;
+  }
+
+  assert(isExecuting() && "Instruction not in-flight?");
+  assert(CyclesLeft && "Instruction already executed?");
+  for (UniqueDef &Def : Defs)
+    Def->cycleEvent();
+  CyclesLeft--;
+  if (!CyclesLeft)
+    Stage = IS_EXECUTED;
+}
+
+const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt
+++ llvm/trunk/tools/llvm-mca/lib/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-mca/lib/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCA
+parent = Libraries
+required_libraries = CodeGen MC Support
Index: llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Pipeline.cpp
@@ -0,0 +1,97 @@
+//===--------------------- Pipeline.cpp -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements an ordered container of stages that simulate the
+/// pipeline of a hardware backend.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Pipeline.h"
+#include "HWEventListener.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/Debug.h"
+
+namespace mca {
+
+#define DEBUG_TYPE "llvm-mca"
+
+using namespace llvm;
+
+void Pipeline::addEventListener(HWEventListener *Listener) {
+  if (Listener)
+    Listeners.insert(Listener);
+  for (auto &S : Stages)
+    S->addListener(Listener);
+}
+
+bool Pipeline::hasWorkToProcess() {
+  return llvm::any_of(Stages, [](const std::unique_ptr<Stage> &S) {
+    return S->hasWorkToComplete();
+  });
+}
+
+llvm::Error Pipeline::run() {
+  assert(!Stages.empty() && "Unexpected empty pipeline found!");
+
+  while (hasWorkToProcess()) {
+    notifyCycleBegin();
+    if (llvm::Error Err = runCycle())
+      return Err;
+    notifyCycleEnd();
+    ++Cycles;
+  }
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error Pipeline::runCycle() {
+  llvm::Error Err = llvm::ErrorSuccess();
+  // Update stages before we start processing new instructions.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleStart();
+  }
+
+  // Now fetch and execute new instructions.
+  InstRef IR;
+  Stage &FirstStage = *Stages[0];
+  while (!Err && FirstStage.isAvailable(IR))
+    Err = FirstStage.execute(IR);
+
+  // Update stages in preparation for a new cycle.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
+    const std::unique_ptr<Stage> &S = *I;
+    Err = S->cycleEnd();
+  }
+
+  return Err;
+}
+
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+  assert(S && "Invalid null stage in input!");
+  if (!Stages.empty()) {
+    Stage *Last = Stages.back().get();
+    Last->setNextInSequence(S.get());
+  }
+
+  Stages.push_back(std::move(S));
+}
+
+void Pipeline::notifyCycleBegin() {
+  LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleBegin();
+}
+
+void Pipeline::notifyCycleEnd() {
+  LLVM_DEBUG(dbgs() << "[E] Cycle end: " << Cycles << "\n\n");
+  for (HWEventListener *Listener : Listeners)
+    Listener->onCycleEnd();
+}
+} // namespace mca.
Index: llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/DispatchStage.cpp
@@ -0,0 +1,160 @@
+//===--------------------- DispatchStage.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file models the dispatch component of an instruction pipeline.
+///
+/// The DispatchStage is responsible for updating instruction dependencies
+/// and communicating to the simulated instruction scheduler that an instruction
+/// is ready to be scheduled for execution.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/DispatchStage.h"
+#include "HWEventListener.h"
+#include "HardwareUnits/Scheduler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+void DispatchStage::notifyInstructionDispatched(const InstRef &IR,
+                                                ArrayRef<unsigned> UsedRegs) {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Dispatched: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(HWInstructionDispatchedEvent(IR, UsedRegs));
+}
+
+bool DispatchStage::checkPRF(const InstRef &IR) const {
+  SmallVector<unsigned, 4> RegDefs;
+  for (const std::unique_ptr<WriteState> &RegDef :
+       IR.getInstruction()->getDefs())
+    RegDefs.emplace_back(RegDef->getRegisterID());
+
+  const unsigned RegisterMask = PRF.isAvailable(RegDefs);
+  // A mask with all zeroes means: register files are available.
+  if (RegisterMask) {
+    notifyEvent<HWStallEvent>(
+        HWStallEvent(HWStallEvent::RegisterFileStall, IR));
+    return false;
+  }
+
+  return true;
+}
+
+bool DispatchStage::checkRCU(const InstRef &IR) const {
+  const unsigned NumMicroOps = IR.getInstruction()->getDesc().NumMicroOps;
+  if (RCU.isAvailable(NumMicroOps))
+    return true;
+  notifyEvent<HWStallEvent>(
+      HWStallEvent(HWStallEvent::RetireControlUnitStall, IR));
+  return false;
+}
+
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
+}
+
+void DispatchStage::updateRAWDependencies(ReadState &RS,
+                                          const MCSubtargetInfo &STI) {
+  SmallVector<WriteRef, 4> DependentWrites;
+
+  collectWrites(DependentWrites, RS.getRegisterID());
+  RS.setDependentWrites(DependentWrites.size());
+  // We know that this read depends on all the writes in DependentWrites.
+  // For each write, check if we have ReadAdvance information, and use it
+  // to figure out in how many cycles this read becomes available.
+  const ReadDescriptor &RD = RS.getDescriptor();
+  const MCSchedModel &SM = STI.getSchedModel();
+  const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
+  for (WriteRef &WR : DependentWrites) {
+    WriteState &WS = *WR.getWriteState();
+    unsigned WriteResID = WS.getWriteResourceID();
+    int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
+    WS.addUser(&RS, ReadAdvance);
+  }
+}
+
+llvm::Error DispatchStage::dispatch(InstRef IR) {
+  assert(!CarryOver && "Cannot dispatch another instruction!");
+  Instruction &IS = *IR.getInstruction();
+  const InstrDesc &Desc = IS.getDesc();
+  const unsigned NumMicroOps = Desc.NumMicroOps;
+  if (NumMicroOps > DispatchWidth) {
+    assert(AvailableEntries == DispatchWidth);
+    AvailableEntries = 0;
+    CarryOver = NumMicroOps - DispatchWidth;
+  } else {
+    assert(AvailableEntries >= NumMicroOps);
+    AvailableEntries -= NumMicroOps;
+  }
+
+  // A dependency-breaking instruction doesn't have to wait on the register
+  // input operands, and it is often optimized at register renaming stage.
+  // Update RAW dependencies if this instruction is not a dependency-breaking
+  // instruction. A dependency-breaking instruction is a zero-latency
+  // instruction that doesn't consume hardware resources.
+  // An example of dependency-breaking instruction on X86 is a zero-idiom XOR.
+  bool IsDependencyBreaking = IS.isDependencyBreaking();
+  for (std::unique_ptr<ReadState> &RS : IS.getUses())
+    if (RS->isImplicitRead() || !IsDependencyBreaking)
+      updateRAWDependencies(*RS, STI);
+
+  // By default, a dependency-breaking zero-latency instruction is expected to
+  // be optimized at register renaming stage. That means, no physical register
+  // is allocated to the instruction.
+  bool ShouldAllocateRegisters =
+      !(Desc.isZeroLatency() && IsDependencyBreaking);
+  SmallVector<unsigned, 4> RegisterFiles(PRF.getNumRegisterFiles());
+  for (std::unique_ptr<WriteState> &WS : IS.getDefs()) {
+    PRF.addRegisterWrite(WriteRef(IR.first, WS.get()), RegisterFiles,
+                         ShouldAllocateRegisters);
+  }
+
+  // Reserve slots in the RCU, and notify the instruction that it has been
+  // dispatched to the schedulers for execution.
+  IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
+
+  // Notify listeners of the "instruction dispatched" event,
+  // and move IR to the next stage.
+  notifyInstructionDispatched(IR, RegisterFiles);
+  return moveToTheNextStage(IR);
+}
+
+llvm::Error DispatchStage::cycleStart() {
+  AvailableEntries = CarryOver >= DispatchWidth ? 0 : DispatchWidth - CarryOver;
+  CarryOver = CarryOver >= DispatchWidth ? CarryOver - DispatchWidth : 0U;
+  return llvm::ErrorSuccess();
+}
+
+bool DispatchStage::isAvailable(const InstRef &IR) const {
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+  if (Required > AvailableEntries)
+    return false;
+  // The dispatch logic doesn't internally buffer instructions.  It only accepts
+  // instructions that can be successfully moved to the next stage during this
+  // same cycle.
+  return canDispatch(IR);
+}
+
+llvm::Error DispatchStage::execute(InstRef &IR) {
+  assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+  return dispatch(IR);
+}
+
+#ifndef NDEBUG
+void DispatchStage::dump() const {
+  PRF.dump();
+  RCU.dump();
+}
+#endif
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/ExecuteStage.cpp
@@ -0,0 +1,195 @@
+//===---------------------- ExecuteStage.cpp --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the execution stage of an instruction pipeline.
+///
+/// The ExecuteStage is responsible for managing the hardware scheduler
+/// and issuing notifications that an instruction has been executed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/ExecuteStage.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+using namespace llvm;
+
+HWStallEvent::GenericEventType toHWStallEventType(Scheduler::Status Status) {
+  switch (Status) {
+  case Scheduler::SC_LOAD_QUEUE_FULL:
+    return HWStallEvent::LoadQueueFull;
+  case Scheduler::SC_STORE_QUEUE_FULL:
+    return HWStallEvent::StoreQueueFull;
+  case Scheduler::SC_BUFFERS_FULL:
+    return HWStallEvent::SchedulerQueueFull;
+  case Scheduler::SC_DISPATCH_GROUP_STALL:
+    return HWStallEvent::DispatchGroupStall;
+  case Scheduler::SC_AVAILABLE:
+    return HWStallEvent::Invalid;
+  }
+
+  llvm_unreachable("Don't know how to process this StallKind!");
+}
+
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+  if (Scheduler::Status S = HWS.isAvailable(IR)) {
+    HWStallEvent::GenericEventType ET = toHWStallEventType(S);
+    notifyEvent<HWStallEvent>(HWStallEvent(ET, IR));
+    return false;
+  }
+
+  return true;
+}
+
+Error ExecuteStage::issueInstruction(InstRef &IR) {
+  SmallVector<std::pair<ResourceRef, double>, 4> Used;
+  SmallVector<InstRef, 4> Ready;
+  HWS.issueInstruction(IR, Used, Ready);
+
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  notifyReleasedBuffers(Desc.Buffers);
+  notifyInstructionIssued(IR, Used);
+  if (IR.getInstruction()->isExecuted()) {
+    notifyInstructionExecuted(IR);
+    //FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &I : Ready)
+    notifyInstructionReady(I);
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::issueReadyInstructions() {
+  InstRef IR = HWS.select();
+  while (IR.isValid()) {
+    if (Error Err = issueInstruction(IR))
+      return Err;
+
+    // Select the next instruction to issue.
+    IR = HWS.select();
+  }
+
+  return ErrorSuccess();
+}
+
+Error ExecuteStage::cycleStart() {
+  llvm::SmallVector<ResourceRef, 8> Freed;
+  llvm::SmallVector<InstRef, 4> Executed;
+  llvm::SmallVector<InstRef, 4> Ready;
+
+  HWS.cycleEvent(Freed, Executed, Ready);
+
+  for (const ResourceRef &RR : Freed)
+    notifyResourceAvailable(RR);
+
+  for (InstRef &IR : Executed) {
+    notifyInstructionExecuted(IR);
+    //FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
+
+  for (const InstRef &IR : Ready)
+    notifyInstructionReady(IR);
+
+  return issueReadyInstructions();
+}
+
+// Schedule the instruction for execution on the hardware.
+Error ExecuteStage::execute(InstRef &IR) {
+  assert(isAvailable(IR) && "Scheduler is not available!");
+
+#ifndef NDEBUG
+  // Ensure that the HWS has not stored this instruction in its queues.
+  HWS.sanityCheck(IR);
+#endif
+  // Reserve a slot in each buffered resource. Also, mark units with
+  // BufferSize=0 as reserved. Resources with a buffer size of zero will only
+  // be released after MCIS is issued, and all the ResourceCycles for those
+  // units have been consumed.
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  HWS.dispatch(IR);
+  notifyReservedBuffers(Desc.Buffers);
+  if (!HWS.isReady(IR))
+    return ErrorSuccess();
+
+  // If we did not return early, then the scheduler is ready for execution.
+  notifyInstructionReady(IR);
+
+  // If we cannot issue immediately, the HWS will add IR to its ready queue for
+  // execution later, so we must return early here.
+  if (!HWS.mustIssueImmediately(IR))
+    return ErrorSuccess();
+
+  // Issue IR to the underlying pipelines.
+  return issueInstruction(IR);
+}
+
+void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Executed, IR));
+}
+
+void ExecuteStage::notifyInstructionReady(const InstRef &IR) {
+  LLVM_DEBUG(dbgs() << "[E] Instruction Ready: #" << IR << '\n');
+  notifyEvent<HWInstructionEvent>(
+      HWInstructionEvent(HWInstructionEvent::Ready, IR));
+}
+
+void ExecuteStage::notifyResourceAvailable(const ResourceRef &RR) {
+  LLVM_DEBUG(dbgs() << "[E] Resource Available: [" << RR.first << '.'
+                    << RR.second << "]\n");
+  for (HWEventListener *Listener : getListeners())
+    Listener->onResourceAvailable(RR);
+}
+
+void ExecuteStage::notifyInstructionIssued(
+    const InstRef &IR, ArrayRef<std::pair<ResourceRef, double>> Used) {
+  LLVM_DEBUG({
+    dbgs() << "[E] Instruction Issued: #" << IR << '\n';
+    for (const std::pair<ResourceRef, unsigned> &Resource : Used) {
+      dbgs() << "[E] Resource Used: [" << Resource.first.first << '.'
+             << Resource.first.second << "], ";
+      dbgs() << "cycles: " << Resource.second << '\n';
+    }
+  });
+  notifyEvent<HWInstructionEvent>(HWInstructionIssuedEvent(IR, Used));
+}
+
+void ExecuteStage::notifyReservedBuffers(ArrayRef<uint64_t> Buffers) {
+  if (Buffers.empty())
+    return;
+
+  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
+  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
+                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
+  for (HWEventListener *Listener : getListeners())
+    Listener->onReservedBuffers(BufferIDs);
+}
+
+void ExecuteStage::notifyReleasedBuffers(ArrayRef<uint64_t> Buffers) {
+  if (Buffers.empty())
+    return;
+
+  SmallVector<unsigned, 4> BufferIDs(Buffers.begin(), Buffers.end());
+  std::transform(Buffers.begin(), Buffers.end(), BufferIDs.begin(),
+                 [&](uint64_t Op) { return HWS.getResourceID(Op); });
+  for (HWEventListener *Listener : getListeners())
+    Listener->onReleasedBuffers(BufferIDs);
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/FetchStage.cpp
@@ -0,0 +1,82 @@
+//===---------------------- FetchStage.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the Fetch stage of an instruction pipeline.  Its sole
+/// purpose in life is to produce instructions for the rest of the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/FetchStage.h"
+
+namespace mca {
+
+bool FetchStage::hasWorkToComplete() const {
+  return CurrentInstruction.get() || SM.hasNext();
+}
+
+bool FetchStage::isAvailable(const InstRef & /* unused */) const {
+  if (!CurrentInstruction)
+    return false;
+  assert(SM.hasNext() && "Unexpected internal state!");
+  const SourceRef SR = SM.peekNext();
+  InstRef IR(SR.first, CurrentInstruction.get());
+  return checkNextStage(IR);
+}
+
+llvm::Error FetchStage::getNextInstruction() {
+  assert(!CurrentInstruction && "There is already an instruction to process!");
+  if (!SM.hasNext())
+    return llvm::ErrorSuccess();
+  const SourceRef SR = SM.peekNext();
+  llvm::Expected<std::unique_ptr<Instruction>> InstOrErr =
+      IB.createInstruction(*SR.second);
+  if (!InstOrErr)
+    return InstOrErr.takeError();
+  CurrentInstruction = std::move(InstOrErr.get());
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error FetchStage::execute(InstRef & /*unused */) {
+  assert(CurrentInstruction && "There is no instruction to process!");
+  const SourceRef SR = SM.peekNext();
+  InstRef IR(SR.first, CurrentInstruction.get());
+  assert(checkNextStage(IR) && "Invalid fetch!");
+
+  Instructions[IR.getSourceIndex()] = std::move(CurrentInstruction);
+  if (llvm::Error Val = moveToTheNextStage(IR))
+    return Val;
+
+  SM.updateNext();
+
+  // Move the program counter.
+  return getNextInstruction();
+}
+
+llvm::Error FetchStage::cycleStart() {
+  if (!CurrentInstruction && SM.hasNext())
+    return getNextInstruction();
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error FetchStage::cycleEnd() {
+  // Find the first instruction which hasn't been retired.
+  const InstMap::iterator It =
+      llvm::find_if(Instructions, [](const InstMap::value_type &KeyValuePair) {
+        return !KeyValuePair.second->isRetired();
+      });
+
+  // Erase instructions up to the first that hasn't been retired.
+  if (It != Instructions.begin())
+    Instructions.erase(Instructions.begin(), It);
+
+  return llvm::ErrorSuccess();
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/InstructionTables.cpp
@@ -0,0 +1,70 @@
+//===--------------------- InstructionTables.cpp ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements the method InstructionTables::execute().
+/// Method execute() prints a theoretical resource pressure distribution based
+/// on the information available in the scheduling model, and without running
+/// the pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/InstructionTables.h"
+
+namespace mca {
+
+using namespace llvm;
+
+Error InstructionTables::execute(InstRef &IR) {
+  ArrayRef<uint64_t> Masks = IB.getProcResourceMasks();
+  const InstrDesc &Desc = IR.getInstruction()->getDesc();
+  UsedResources.clear();
+
+  // Identify the resources consumed by this instruction.
+  for (const std::pair<uint64_t, ResourceUsage> Resource : Desc.Resources) {
+    // Skip zero-cycle resources (i.e., unused resources).
+    if (!Resource.second.size())
+      continue;
+    double Cycles = static_cast<double>(Resource.second.size());
+    unsigned Index = std::distance(
+        Masks.begin(), std::find(Masks.begin(), Masks.end(), Resource.first));
+    const MCProcResourceDesc &ProcResource = *SM.getProcResource(Index);
+    unsigned NumUnits = ProcResource.NumUnits;
+    if (!ProcResource.SubUnitsIdxBegin) {
+      // The number of cycles consumed by each unit.
+      Cycles /= NumUnits;
+      for (unsigned I = 0, E = NumUnits; I < E; ++I) {
+        ResourceRef ResourceUnit = std::make_pair(Index, 1U << I);
+        UsedResources.emplace_back(std::make_pair(ResourceUnit, Cycles));
+      }
+      continue;
+    }
+
+    // This is a group. Obtain the set of resources contained in this
+    // group. Some of these resources may implement multiple units.
+    // Uniformly distribute Cycles across all of the units.
+    for (unsigned I1 = 0; I1 < NumUnits; ++I1) {
+      unsigned SubUnitIdx = ProcResource.SubUnitsIdxBegin[I1];
+      const MCProcResourceDesc &SubUnit = *SM.getProcResource(SubUnitIdx);
+      // Compute the number of cycles consumed by each resource unit.
+      double RUCycles = Cycles / (NumUnits * SubUnit.NumUnits);
+      for (unsigned I2 = 0, E2 = SubUnit.NumUnits; I2 < E2; ++I2) {
+        ResourceRef ResourceUnit = std::make_pair(SubUnitIdx, 1U << I2);
+        UsedResources.emplace_back(std::make_pair(ResourceUnit, RUCycles));
+      }
+    }
+  }
+
+  // Send a fake instruction issued event to all the views.
+  HWInstructionIssuedEvent Event(IR, UsedResources);
+  notifyEvent<HWInstructionIssuedEvent>(Event);
+  return ErrorSuccess();
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/RetireStage.cpp
@@ -0,0 +1,62 @@
+//===---------------------- RetireStage.cpp ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the retire stage of an instruction pipeline.
+/// The RetireStage represents the process logic that interacts with the
+/// simulated RetireControlUnit hardware.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/RetireStage.h"
+#include "HWEventListener.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "llvm-mca"
+
+namespace mca {
+
+llvm::Error RetireStage::cycleStart() {
+  if (RCU.isEmpty())
+    return llvm::ErrorSuccess();
+
+  const unsigned MaxRetirePerCycle = RCU.getMaxRetirePerCycle();
+  unsigned NumRetired = 0;
+  while (!RCU.isEmpty()) {
+    if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
+      break;
+    const RetireControlUnit::RUToken &Current = RCU.peekCurrentToken();
+    if (!Current.Executed)
+      break;
+    RCU.consumeCurrentToken();
+    notifyInstructionRetired(Current.IR);
+    NumRetired++;
+  }
+
+  return llvm::ErrorSuccess();
+}
+
+llvm::Error RetireStage::execute(InstRef &IR) {
+  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  return llvm::ErrorSuccess();
+}
+
+void RetireStage::notifyInstructionRetired(const InstRef &IR) {
+  LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
+  llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+  const Instruction &Inst = *IR.getInstruction();
+  const InstrDesc &Desc = Inst.getDesc();
+
+  bool ShouldFreeRegs = !(Desc.isZeroLatency() && Inst.isDependencyBreaking());
+  for (const std::unique_ptr<WriteState> &WS : Inst.getDefs())
+    PRF.removeRegisterWrite(*WS.get(), FreedRegs, ShouldFreeRegs);
+  notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Stages/Stage.cpp
@@ -0,0 +1,27 @@
+//===---------------------- Stage.cpp ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines a stage.
+/// A chain of stages compose an instruction pipeline.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Stages/Stage.h"
+
+namespace mca {
+
+// Pin the vtable here in the implementation file.
+Stage::~Stage() = default;
+
+void Stage::addListener(HWEventListener *Listener) {
+  Listeners.insert(Listener);
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/lib/Support.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/lib/Support.cpp
+++ llvm/trunk/tools/llvm-mca/lib/Support.cpp
@@ -0,0 +1,79 @@
+//===--------------------- Support.cpp --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements a few helper functions used by various pipeline
+/// components.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Support.h"
+#include "llvm/MC/MCSchedule.h"
+
+namespace mca {
+
+using namespace llvm;
+
+void computeProcResourceMasks(const MCSchedModel &SM,
+                              SmallVectorImpl<uint64_t> &Masks) {
+  unsigned ProcResourceID = 0;
+
+  // Create a unique bitmask for every processor resource unit.
+  // Skip resource at index 0, since it always references 'InvalidUnit'.
+  Masks.resize(SM.getNumProcResourceKinds());
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    ProcResourceID++;
+  }
+
+  // Create a unique bitmask for every processor resource group.
+  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+    if (!Desc.SubUnitsIdxBegin)
+      continue;
+    Masks[I] = 1ULL << ProcResourceID;
+    for (unsigned U = 0; U < Desc.NumUnits; ++U) {
+      uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]];
+      Masks[I] |= OtherMask;
+    }
+    ProcResourceID++;
+  }
+}
+
+double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
+                               unsigned NumMicroOps,
+                               ArrayRef<unsigned> ProcResourceUsage) {
+  // The block throughput is bounded from above by the hardware dispatch
+  // throughput. That is because the DispatchWidth is an upper bound on the
+  // number of opcodes that can be part of a single dispatch group.
+  double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
+
+  // The block throughput is also limited by the amount of hardware parallelism.
+  // The number of available resource units affects the resource pressure
+  // distribution, as well as how many blocks can be executed every cycle.
+  for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+    unsigned ResourceCycles = ProcResourceUsage[I];
+    if (!ResourceCycles)
+      continue;
+
+    const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
+    double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
+    Max = std::max(Max, Throughput);
+  }
+
+  // The block reciprocal throughput is computed as the MAX of:
+  //  - (NumMicroOps / DispatchWidth)
+  //  - (NumUnits / ResourceCycles)   for every consumed processor resource.
+  return Max;
+}
+
+} // namespace mca
Index: llvm/trunk/tools/llvm-mca/llvm-mca.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/llvm-mca.cpp
+++ llvm/trunk/tools/llvm-mca/llvm-mca.cpp
@@ -22,11 +22,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeRegion.h"
-#include "Context.h"
-#include "FetchStage.h"
-#include "InstructionTables.h"
-#include "Pipeline.h"
 #include "PipelinePrinter.h"
+#include "Stages/FetchStage.h"
+#include "Stages/InstructionTables.h"
 #include "Views/DispatchStatistics.h"
 #include "Views/InstructionInfoView.h"
 #include "Views/RegisterFileStatistics.h"
@@ -35,6 +33,8 @@
 #include "Views/SchedulerStatistics.h"
 #include "Views/SummaryView.h"
 #include "Views/TimelineView.h"
+#include "include/Context.h"
+#include "include/Pipeline.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectFileInfo.h"