Index: include/llvm/CodeGen/MachineCombinerPattern.h
===================================================================
--- include/llvm/CodeGen/MachineCombinerPattern.h
+++ include/llvm/CodeGen/MachineCombinerPattern.h
@@ -21,6 +21,12 @@
 ///
 ///
 namespace MachineCombinerPattern {
+
+// Encode optional information into the enum value of each pattern.
+enum MC_COST_CALCULATION : int {
+  USE_SLACK = 1 << 31
+};
+
 // Forward declaration
 enum MC_PATTERN : int {
   // These are commutative variants for reassociating a computation chain. See
@@ -29,10 +35,10 @@
   MC_REASSOC_AX_YB = 1,
   MC_REASSOC_XA_BY = 2,
   MC_REASSOC_XA_YB = 3,
+  LAST_REASSOC_PATTERN = MC_REASSOC_XA_YB,
 
   /// Enumeration of instruction pattern supported by AArch64 machine combiner
-  MC_NONE,
-  MC_MULADDW_OP1,
+  MC_MULADDW_OP1  = (LAST_REASSOC_PATTERN + 1) | USE_SLACK,
   MC_MULADDW_OP2,
   MC_MULSUBW_OP1,
   MC_MULSUBW_OP2,
Index: lib/CodeGen/MachineCombiner.cpp
===================================================================
--- lib/CodeGen/MachineCombiner.cpp
+++ lib/CodeGen/MachineCombiner.cpp
@@ -71,7 +71,7 @@
                            MachineTraceMetrics::Trace BlockTrace,
                            SmallVectorImpl<MachineInstr *> &InsInstrs,
                            DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                           bool NewCodeHasLessInsts);
+                           bool NewCodeHasLessInsts, bool UseSlack);
   bool preservesResourceLen(MachineBasicBlock *MBB,
                             MachineTraceMetrics::Trace BlockTrace,
                             SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -222,13 +222,15 @@
 /// If the new sequence has an equal length critical path but does not reduce
 /// the number of instructions (NewCodeHasLessInsts is false), then it is not
 /// considered an improvement. The slack is the number of cycles Root can be
-/// delayed before the critical patch becomes longer.
+/// delayed before the critical patch becomes longer. Slack may optionally be
+/// excluded from the calculation to provide a more conservative estimate of the
+/// original critical path length.
 bool MachineCombiner::improvesCriticalPathLen(
     MachineBasicBlock *MBB, MachineInstr *Root,
     MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-    bool NewCodeHasLessInsts) {
+    bool NewCodeHasLessInsts, bool UseSlack) {
 
   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
          "Missing machine model\n");
@@ -242,7 +244,7 @@
   // Get depth, latency and slack of Root.
   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
-  unsigned RootSlack = BlockTrace.getInstrSlack(Root);
+  unsigned RootSlack = UseSlack ? BlockTrace.getInstrSlack(Root) : 0;
 
   DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
         dbgs() << " NewRootDepth: " << NewRootDepth
@@ -387,8 +389,9 @@
         // resource pressure.
         if (doSubstitute(NewInstCount, OldInstCount) ||
             (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                      InstrIdxForVirtReg,
-                                      NewInstCount < OldInstCount) &&
+                                     InstrIdxForVirtReg,
+                                     NewInstCount < OldInstCount,
+                                     P & MachineCombinerPattern::USE_SLACK) &&
              preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
           for (auto *InstrPtr : InsInstrs)
             MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
Index: test/CodeGen/X86/machine-combiner.ll
===================================================================
--- test/CodeGen/X86/machine-combiner.ll
+++ test/CodeGen/X86/machine-combiner.ll
@@ -632,10 +632,10 @@
 ; AVX-NEXT:  callq   bar
 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 ; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  (%rsp), %xmm1
-; AVX:       vaddsd  8(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
-; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
 
   %x0 = call double @bar()
   %x1 = call double @bar()
@@ -656,9 +656,10 @@
 ; AVX-NEXT:  callq   bar
 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 ; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  8(%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
+; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
 
   %x0 = call double @bar()
   %x1 = call double @bar()