diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -38,6 +38,7 @@
 
   /// Instructions that were issued, but not executed yet.
   SmallVector<InstRef, 4> IssuedInst;
+  InstRef LastIssuedInst;
 
   /// Number of instructions issued in the current cycle.
   unsigned NumIssued;
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -57,6 +57,37 @@
   return false;
 }
 
+// Check that all writes of the First instruction occur before writes
+// of the Second instruction. Otherwise return the number of cycles
+// between the first write of the Second instruction and the last
+// write of the First instruction.
+static unsigned checkWritesOrder(const InstRef &First, const InstRef &Second) {
+  unsigned FirstWriteEnd = 0;
+  for (const WriteState &FirstWS : First.getInstruction()->getDefs()) {
+    int CyclesLeft = FirstWS.getCyclesLeft();
+    if (CyclesLeft == UNKNOWN_CYCLES)
+      CyclesLeft = FirstWS.getLatency();
+    if (CyclesLeft < 0)
+      CyclesLeft = 0;
+    FirstWriteEnd = std::max(FirstWriteEnd, (unsigned)CyclesLeft);
+  }
+
+  unsigned SecondWriteStart = ~0U;
+  for (const WriteState &SecondWS : Second.getInstruction()->getDefs()) {
+    int CyclesLeft = SecondWS.getCyclesLeft();
+    if (CyclesLeft == UNKNOWN_CYCLES)
+      CyclesLeft = SecondWS.getLatency();
+    if (CyclesLeft < 0)
+      CyclesLeft = 0;
+    SecondWriteStart = std::min(SecondWriteStart, (unsigned)CyclesLeft);
+  }
+
+  if (SecondWriteStart >= FirstWriteEnd)
+    return 0;
+
+  return FirstWriteEnd - SecondWriteStart;
+}
+
 /// Return a number of cycles left until register requirements of the
 /// instructions are met.
 static unsigned checkRegisterHazard(const RegisterFile &PRF,
@@ -118,6 +149,14 @@
         HWPressureEvent(HWPressureEvent::RESOURCES, IR));
   }
 
+  if (LastIssuedInst && !LastIssuedInst.getInstruction()->getDesc().RetireOOO) {
+    // Delay the instruction to ensure that writes occur in program
+    // order
+    if (unsigned StallWritesOrder = checkWritesOrder(LastIssuedInst, IR)) {
+      *StallCycles = StallWritesOrder;
+    }
+  }
+
   return *StallCycles == 0;
 }
 
@@ -211,6 +250,7 @@
   notifyInstructionExecute(IR, UsedResources, *this);
 
   IssuedInst.push_back(IR);
+  LastIssuedInst = IR;
   ++NumIssued;
 
   return llvm::ErrorSuccess();
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -35,7 +35,7 @@
 # CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RAT     - Register unavailable:                      8  (38.1%)
 # CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -35,7 +35,7 @@
 # CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RAT     - Register unavailable:                      8  (38.1%)
 # CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0
@@ -106,13 +106,13 @@
 # CHECK:      [0,0]     DeeER.    .    .    .   ldr	w4, [x2], #4
 # CHECK-NEXT: [0,1]     .DeeER    .    .    .   ldr	w5, [x3]
 # CHECK-NEXT: [0,2]     .   DeeeER.    .    .   madd	w0, w5, w4, w0
-# CHECK-NEXT: [0,3]     .   DeeE-R.    .    .   add	x3, x3, x13
+# CHECK-NEXT: [0,3]     .    DeeER.    .    .   add	x3, x3, x13
 # CHECK-NEXT: [0,4]     .    DeeER.    .    .   subs	x1, x1, #1
 # CHECK-NEXT: [0,5]     .    . DeeeER  .    .   str	w0, [x21, x18, lsl #2]
 # CHECK-NEXT: [1,0]     .    .  DeeER  .    .   ldr	w4, [x2], #4
 # CHECK-NEXT: [1,1]     .    .   DeeER .    .   ldr	w5, [x3]
 # CHECK-NEXT: [1,2]     .    .    . DeeeER  .   madd	w0, w5, w4, w0
-# CHECK-NEXT: [1,3]     .    .    . DeeE-R  .   add	x3, x3, x13
+# CHECK-NEXT: [1,3]     .    .    .  DeeER  .   add	x3, x3, x13
 # CHECK-NEXT: [1,4]     .    .    .  DeeER  .   subs	x1, x1, #1
 # CHECK-NEXT: [1,5]     .    .    .    DeeeER   str	w0, [x21, x18, lsl #2]
 
@@ -126,7 +126,7 @@
 # CHECK-NEXT: 0.     2     0.0    0.0    0.0       ldr	w4, [x2], #4
 # CHECK-NEXT: 1.     2     0.0    0.0    0.0       ldr	w5, [x3]
 # CHECK-NEXT: 2.     2     0.0    0.0    0.0       madd	w0, w5, w4, w0
-# CHECK-NEXT: 3.     2     0.0    0.0    1.0       add	x3, x3, x13
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	x3, x3, x13
 # CHECK-NEXT: 4.     2     0.0    0.0    0.0       subs	x1, x1, #1
 # CHECK-NEXT: 5.     2     0.0    0.0    0.0       str	w0, [x21, x18, lsl #2]
-# CHECK-NEXT:        2     0.0    0.0    0.2       <total>
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
@@ -10,12 +10,12 @@
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      12
-# CHECK-NEXT: Total Cycles:      18
+# CHECK-NEXT: Total Cycles:      20
 # CHECK-NEXT: Total uOps:        12
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.67
-# CHECK-NEXT: IPC:               0.67
+# CHECK-NEXT: uOps Per Cycle:    0.60
+# CHECK-NEXT: IPC:               0.60
 # CHECK-NEXT: Block RThroughput: 8.0
 
 # CHECK:      Instruction Info:
@@ -40,33 +40,37 @@
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0
 # CHECK-NEXT: SQ      - Store queue full:                          0
-# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 5  (27.8%)
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 1  (5.0%)
 
 # CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
 # CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT:  0,              12  (66.7%)
-# CHECK-NEXT:  2,              6  (33.3%)
+# CHECK-NEXT:  0,              12  (60.0%)
+# CHECK-NEXT:  1,              4  (20.0%)
+# CHECK-NEXT:  2,              4  (20.0%)
 
 # CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
 # CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT:  0,          12  (66.7%)
-# CHECK-NEXT:  2,          6  (33.3%)
+# CHECK-NEXT:  0,          12  (60.0%)
+# CHECK-NEXT:  1,          4  (20.0%)
+# CHECK-NEXT:  2,          4  (20.0%)
 
 # CHECK:      Scheduler's queue usage:
 # CHECK-NEXT: No scheduler resources used.
 
 # CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
 # CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT:  0,           16  (88.9%)
-# CHECK-NEXT:  6,           2  (11.1%)
+# CHECK-NEXT:  0,           14  (70.0%)
+# CHECK-NEXT:  1,           2  (10.0%)
+# CHECK-NEXT:  2,           2  (10.0%)
+# CHECK-NEXT:  3,           2  (10.0%)
 
 # CHECK:      Total ROB Entries:                64
-# CHECK-NEXT: Max Used ROB Entries:             8  ( 12.5% )
-# CHECK-NEXT: Average Used ROB Entries per cy:  5  ( 7.8% )
+# CHECK-NEXT: Max Used ROB Entries:             7  ( 10.9% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( 3.1% )
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    12
-# CHECK-NEXT: Max number of mappings used:         8
+# CHECK-NEXT: Max number of mappings used:         7
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - CortexA55UnitALU
@@ -96,21 +100,21 @@
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w7, w9, w0
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     01234567
+# CHECK-NEXT:                     0123456789
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    . .   sdiv	w12, w21, w0
-# CHECK-NEXT: [0,1]     DeeE-----R.    . .   add	w8, w8, #1
-# CHECK-NEXT: [0,2]     .DeeE----R.    . .   add	w1, w2, w0
-# CHECK-NEXT: [0,3]     .DeeE----R.    . .   add	w3, w4, #1
-# CHECK-NEXT: [0,4]     . DeeE---R.    . .   add	w5, w6, w0
-# CHECK-NEXT: [0,5]     . DeeE---R.    . .   add	w7, w9, w0
-# CHECK-NEXT: [1,0]     .    .  DeeeeeeeER   sdiv	w12, w21, w0
-# CHECK-NEXT: [1,1]     .    .  DeeE-----R   add	w8, w8, #1
-# CHECK-NEXT: [1,2]     .    .   DeeE----R   add	w1, w2, w0
-# CHECK-NEXT: [1,3]     .    .   DeeE----R   add	w3, w4, #1
-# CHECK-NEXT: [1,4]     .    .    DeeE---R   add	w5, w6, w0
-# CHECK-NEXT: [1,5]     .    .    DeeE---R   add	w7, w9, w0
+# CHECK:      [0,0]     DeeeeeeeER.    .   .   sdiv	w12, w21, w0
+# CHECK-NEXT: [0,1]     .    DeeER.    .   .   add	w8, w8, #1
+# CHECK-NEXT: [0,2]     .    DeeER.    .   .   add	w1, w2, w0
+# CHECK-NEXT: [0,3]     .    .DeeER    .   .   add	w3, w4, #1
+# CHECK-NEXT: [0,4]     .    .DeeER    .   .   add	w5, w6, w0
+# CHECK-NEXT: [0,5]     .    . DeeER   .   .   add	w7, w9, w0
+# CHECK-NEXT: [1,0]     .    .  DeeeeeeeER .   sdiv	w12, w21, w0
+# CHECK-NEXT: [1,1]     .    .    .  DeeER .   add	w8, w8, #1
+# CHECK-NEXT: [1,2]     .    .    .  DeeER .   add	w1, w2, w0
+# CHECK-NEXT: [1,3]     .    .    .   DeeER.   add	w3, w4, #1
+# CHECK-NEXT: [1,4]     .    .    .   DeeER.   add	w5, w6, w0
+# CHECK-NEXT: [1,5]     .    .    .    DeeER   add	w7, w9, w0
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -120,9 +124,9 @@
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     2     0.0    0.0    0.0       sdiv	w12, w21, w0
-# CHECK-NEXT: 1.     2     0.0    0.0    5.0       add	w8, w8, #1
-# CHECK-NEXT: 2.     2     0.0    0.0    4.0       add	w1, w2, w0
-# CHECK-NEXT: 3.     2     0.0    0.0    4.0       add	w3, w4, #1
-# CHECK-NEXT: 4.     2     0.0    0.0    3.0       add	w5, w6, w0
-# CHECK-NEXT: 5.     2     0.0    0.0    3.0       add	w7, w9, w0
-# CHECK-NEXT:        2     0.0    0.0    3.2       <total>
+# CHECK-NEXT: 1.     2     0.0    0.0    0.0       add	w8, w8, #1
+# CHECK-NEXT: 2.     2     0.0    0.0    0.0       add	w1, w2, w0
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	w3, w4, #1
+# CHECK-NEXT: 4.     2     0.0    0.0    0.0       add	w5, w6, w0
+# CHECK-NEXT: 5.     2     0.0    0.0    0.0       add	w7, w9, w0
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>