diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1498,7 +1498,7 @@
                                 : BaseOp->getIndex() < RHS.BaseOp->getIndex();
 
         if (Offset != RHS.Offset)
-          return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+          return Offset < RHS.Offset;
 
         return SU->NodeNum < RHS.SU->NodeNum;
       }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -89,6 +89,12 @@
   /// if there is a corresponding unscaled variant available.
   static Optional<unsigned> getUnscaledLdSt(unsigned Opc);
 
+  /// Scaling factor for (scaled or unscaled) load or store.
+  static int getMemScale(unsigned Opc);
+  static int getMemScale(const MachineInstr &MI) {
+    return getMemScale(MI.getOpcode());
+  }
+
 
   /// Returns the index for the immediate for a given instruction.
   static unsigned getLoadStoreImmIdx(unsigned Opc);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2230,54 +2230,82 @@
   return true;
 }
 
-static unsigned getOffsetStride(unsigned Opc) {
+// Scaling factor for unscaled load or store.
+int AArch64InstrInfo::getMemScale(unsigned Opc) {
   switch (Opc) {
   default:
-    return 0;
-  case AArch64::LDURQi:
-  case AArch64::STURQi:
-    return 16;
-  case AArch64::LDURXi:
-  case AArch64::LDURDi:
-  case AArch64::STURXi:
-  case AArch64::STURDi:
-    return 8;
-  case AArch64::LDURWi:
+    llvm_unreachable("Opcode has unknown scale!");
+  case AArch64::LDRBBui:
+  case AArch64::LDURBBi:
+  case AArch64::LDRSBWui:
+  case AArch64::LDURSBWi:
+  case AArch64::STRBBui:
+  case AArch64::STURBBi:
+    return 1;
+  case AArch64::LDRHHui:
+  case AArch64::LDURHHi:
+  case AArch64::LDRSHWui:
+  case AArch64::LDURSHWi:
+  case AArch64::STRHHui:
+  case AArch64::STURHHi:
+    return 2;
+  case AArch64::LDRSui:
   case AArch64::LDURSi:
+  case AArch64::LDRSWui:
   case AArch64::LDURSWi:
-  case AArch64::STURWi:
+  case AArch64::LDRWui:
+  case AArch64::LDURWi:
+  case AArch64::STRSui:
   case AArch64::STURSi:
+  case AArch64::STRWui:
+  case AArch64::STURWi:
+  case AArch64::LDPSi:
+  case AArch64::LDPSWi:
+  case AArch64::LDPWi:
+  case AArch64::STPSi:
+  case AArch64::STPWi:
     return 4;
+  case AArch64::LDRDui:
+  case AArch64::LDURDi:
+  case AArch64::LDRXui:
+  case AArch64::LDURXi:
+  case AArch64::STRDui:
+  case AArch64::STURDi:
+  case AArch64::STRXui:
+  case AArch64::STURXi:
+  case AArch64::LDPDi:
+  case AArch64::LDPXi:
+  case AArch64::STPDi:
+  case AArch64::STPXi:
+    return 8;
+  case AArch64::LDRQui:
+  case AArch64::LDURQi:
+  case AArch64::STRQui:
+  case AArch64::STURQi:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
+  case AArch64::STGOffset:
+  case AArch64::STZGOffset:
+  case AArch64::ST2GOffset:
+  case AArch64::STZ2GOffset:
+  case AArch64::STGPi:
+    return 16;
   }
 }
 
 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
 // scaled.
 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
-  unsigned OffsetStride = getOffsetStride(Opc);
-  if (OffsetStride == 0)
-    return false;
+  int Scale = AArch64InstrInfo::getMemScale(Opc);
+
   // If the byte-offset isn't a multiple of the stride, we can't scale this
   // offset.
-  if (Offset % OffsetStride != 0)
+  if (Offset % Scale != 0)
     return false;
 
   // Convert the byte-offset used by unscaled into an "element" offset used
   // by the scaled pair load/store instructions.
-  Offset /= OffsetStride;
-  return true;
-}
-
-// Unscale the scaled offsets. Returns false if the scaled offset can't be
-// unscaled.
-static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
-  unsigned OffsetStride = getOffsetStride(Opc);
-  if (OffsetStride == 0)
-    return false;
-
-  // Convert the "element" offset used by scaled pair load/store instructions
-  // into the byte-offset used by unscaled.
-  Offset *= OffsetStride;
+  Offset /= Scale;
   return true;
 }
 
@@ -2308,15 +2336,17 @@
     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
-    // Get the byte-offset from the object offset.
-    if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
+    // Convert to scaled object offsets.
+    int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
+    if (ObjectOffset1 % Scale1 != 0)
       return false;
+    ObjectOffset1 /= Scale1;
+    int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
+    if (ObjectOffset2 % Scale2 != 0)
+      return false;
+    ObjectOffset2 /= Scale2;
     ObjectOffset1 += Offset1;
     ObjectOffset2 += Offset2;
-    // Get the "element" index in the object.
-    if (!scaleOffset(Opcode1, ObjectOffset1) ||
-        !scaleOffset(Opcode2, ObjectOffset2))
-      return false;
     return ObjectOffset1 + 1 == ObjectOffset2;
   }
 
@@ -2376,7 +2406,7 @@
   // The caller should already have ordered First/SecondLdSt by offset.
   // Note: except for non-equal frame index bases
   if (BaseOp1.isFI()) {
-    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
+    assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
            "Caller should have ordered offsets.");
 
     const MachineFrameInfo &MFI =
@@ -2385,8 +2415,7 @@
                            BaseOp2.getIndex(), Offset2, SecondOpc);
   }
 
-  assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
-         "Caller should have ordered offsets.");
+  assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
 
   return Offset1 + 1 == Offset2;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -230,69 +230,6 @@
   }
 }
 
-// Scaling factor for unscaled load or store.
-static int getMemScale(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    llvm_unreachable("Opcode has unknown scale!");
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
-  case AArch64::LDRSBWui:
-  case AArch64::LDURSBWi:
-  case AArch64::STRBBui:
-  case AArch64::STURBBi:
-    return 1;
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-  case AArch64::LDRSHWui:
-  case AArch64::LDURSHWi:
-  case AArch64::STRHHui:
-  case AArch64::STURHHi:
-    return 2;
-  case AArch64::LDRSui:
-  case AArch64::LDURSi:
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-  case AArch64::LDPSi:
-  case AArch64::LDPSWi:
-  case AArch64::LDPWi:
-  case AArch64::STPSi:
-  case AArch64::STPWi:
-    return 4;
-  case AArch64::LDRDui:
-  case AArch64::LDURDi:
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-  case AArch64::LDPDi:
-  case AArch64::LDPXi:
-  case AArch64::STPDi:
-  case AArch64::STPXi:
-    return 8;
-  case AArch64::LDRQui:
-  case AArch64::LDURQi:
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-  case AArch64::LDPQi:
-  case AArch64::STPQi:
-  case AArch64::STGOffset:
-  case AArch64::STZGOffset:
-  case AArch64::ST2GOffset:
-  case AArch64::STZ2GOffset:
-  case AArch64::STGPi:
-    return 16;
-  }
-}
-
 static unsigned getMatchingNonSExtOpcode(unsigned Opc,
                                          bool *IsValidLdStrOpc = nullptr) {
   if (IsValidLdStrOpc)
@@ -603,7 +540,7 @@
   // ST*G and all paired ldst have the same scale in pre/post-indexed variants
   // as in the "unsigned offset" variant.
   // All other pre/post indexed ldst instructions are unscaled.
-  Scale = (IsTagStore || IsPaired) ? getMemScale(MI) : 1;
+  Scale = (IsTagStore || IsPaired) ? AArch64InstrInfo::getMemScale(MI) : 1;
 
   if (IsPaired) {
     MinOffset = -64;
@@ -635,8 +572,8 @@
                                   MachineInstr &StoreInst,
                                   const AArch64InstrInfo *TII) {
   assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
-  int LoadSize = getMemScale(LoadInst);
-  int StoreSize = getMemScale(StoreInst);
+  int LoadSize = TII->getMemScale(LoadInst);
+  int StoreSize = TII->getMemScale(StoreInst);
   int UnscaledStOffset = TII->isUnscaledLdSt(StoreInst)
                              ? getLdStOffsetOp(StoreInst).getImm()
                              : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
@@ -746,7 +683,7 @@
 
   unsigned Opc = I->getOpcode();
   bool IsScaled = !TII->isUnscaledLdSt(Opc);
-  int OffsetStride = IsScaled ? 1 : getMemScale(*I);
+  int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
 
   bool MergeForward = Flags.getMergeForward();
   // Insert our new paired instruction after whichever of the paired
@@ -853,7 +790,7 @@
   unsigned Opc =
       SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
   bool IsUnscaled = TII->isUnscaledLdSt(Opc);
-  int OffsetStride = IsUnscaled ? getMemScale(*I) : 1;
+  int OffsetStride = IsUnscaled ? TII->getMemScale(*I) : 1;
 
   bool MergeForward = Flags.getMergeForward();
 
@@ -938,11 +875,11 @@
     // We're trying to pair instructions that differ in how they are scaled.  If
     // I is scaled then scale the offset of Paired accordingly.  Otherwise, do
     // the opposite (i.e., make Paired's offset unscaled).
-    int MemSize = getMemScale(*Paired);
+    int MemSize = TII->getMemScale(*Paired);
     if (PairedIsUnscaled) {
       // If the unscaled offset isn't a multiple of the MemSize, we can't
       // pair the operations together.
-      assert(!(PairedOffset % getMemScale(*Paired)) &&
+      assert(!(PairedOffset % TII->getMemScale(*Paired)) &&
              "Offset should be a multiple of the stride!");
       PairedOffset /= MemSize;
     } else {
@@ -967,9 +904,9 @@
   int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
   // Scale the immediate offset, if necessary.
   if (TII->isUnscaledLdSt(RtMI->getOpcode())) {
-    assert(!(OffsetImm % getMemScale(*RtMI)) &&
+    assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
            "Unscaled offset cannot be scaled.");
-    OffsetImm /= getMemScale(*RtMI);
+    OffsetImm /= TII->getMemScale(*RtMI);
   }
 
   // Construct the new instruction.
@@ -1069,8 +1006,8 @@
   MachineBasicBlock::iterator NextI = LoadI;
   ++NextI;
 
-  int LoadSize = getMemScale(*LoadI);
-  int StoreSize = getMemScale(*StoreI);
+  int LoadSize = TII->getMemScale(*LoadI);
+  int StoreSize = TII->getMemScale(*StoreI);
   Register LdRt = getLdStRegOp(*LoadI).getReg();
   const MachineOperand &StMO = getLdStRegOp(*StoreI);
   Register StRt = getLdStRegOp(*StoreI).getReg();
@@ -1489,7 +1426,7 @@
   Register Reg = getLdStRegOp(FirstMI).getReg();
   Register BaseReg = getLdStBaseOp(FirstMI).getReg();
   int Offset = getLdStOffsetOp(FirstMI).getImm();
-  int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+  int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
   bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
 
   Optional<bool> MaybeCanRename = None;
@@ -1534,7 +1471,7 @@
         // We're trying to pair instructions that differ in how they are scaled.
         // If FirstMI is scaled then scale the offset of MI accordingly.
         // Otherwise, do the opposite (i.e., make MI's offset unscaled).
-        int MemSize = getMemScale(MI);
+        int MemSize = TII->getMemScale(MI);
         if (MIIsUnscaled) {
           // If the unscaled offset isn't a multiple of the MemSize, we can't
           // pair the operations together: bail and keep looking.
@@ -1792,7 +1729,7 @@
   MachineBasicBlock::iterator MBBI = I;
 
   Register BaseReg = getLdStBaseOp(MemMI).getReg();
-  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
+  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
 
   // Scan forward looking for post-index opportunities.  Updating instructions
   // can't be formed if the memory instruction doesn't have the offset we're
@@ -1963,7 +1900,7 @@
   // with Offset-1)
   bool IsUnscaled = TII->isUnscaledLdSt(MI);
   int Offset = getLdStOffsetOp(MI).getImm();
-  int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+  int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
   // Allow one more for offset.
   if (Offset > 0)
     Offset -= OffsetStride;
@@ -2029,7 +1966,7 @@
   // The immediate in the load/store is scaled by the size of the memory
   // operation. The immediate in the add we're looking for,
   // however, is not, so adjust here.
-  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
+  int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
 
   // Look forward to try to find a pre-index instruction. For example,
   // ldr x1, [x0, #64]
diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
--- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -113,9 +113,9 @@
 
 define void @bzero_26_stack() {
 ; CHECK-LABEL: bzero_26_stack:
-; CHECK:       stp xzr, xzr, [sp, #8]
-; CHECK-NEXT:  str xzr, [sp]
+; CHECK:       stp xzr, xzr, [sp]
 ; CHECK-NEXT:  strh wzr, [sp, #24]
+; CHECK-NEXT:  str xzr, [sp, #16]
 ; CHECK-NEXT:  bl something
   %buf = alloca [26 x i8], align 1
   %cast = bitcast [26 x i8]* %buf to i8*
@@ -259,9 +259,9 @@
 define void @memset_16_stack() {
 ; CHECK-LABEL: memset_16_stack:
 ; CHECK:       mov x8, #-6148914691236517206
+; CHECK-NEXT:  str x8, [sp, #-32]!
 ; CHECK-NEXT:  mov x0, sp
 ; CHECK-NEXT:  stp x8, x30, [sp, #8]
-; CHECK-NEXT:  str x8, [sp]
 ; CHECK-NEXT:  bl something
   %buf = alloca [16 x i8], align 1
   %cast = bitcast [16 x i8]* %buf to i8*
diff --git a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
--- a/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
+++ b/llvm/test/CodeGen/AArch64/cluster-frame-index.mir
@@ -1,11 +1,10 @@
 #RUN: llc -mtriple=aarch64-- -mcpu=cyclone -run-pass machine-scheduler -o - %s | FileCheck %s
-...
 ---
 name:            merge_stack
 # CHECK-LABEL: name: merge_stack
 tracksRegLiveness: true
 stack:
-  - { id: 0, size: 64, alignment: 8 }
+  - { id: 0, size: 16, alignment: 8 }
 body:             |
   bb.0:
     liveins: $w0, $w1
@@ -25,3 +24,30 @@
     ; CHECK-NEXT: STRXui
     ; CHECK-NEXT: STRXui
     ; CHECK-NEXT: RET
+...
+---
+name:            merge_fixedstack
+# CHECK-LABEL: name: merge_fixedstack
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, size: 16, alignment: 8, offset: -16 }
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    %0:gpr32 = COPY $w0
+    %1:gpr32 = COPY $w1
+    undef %3.sub_32:gpr64 = ORRWrs $wzr, %0, 0
+    STRXui %3, %fixed-stack.0, 0 :: (store 8)
+    undef %5.sub_32:gpr64 = ORRWrs $wzr, %1, 0
+    STRXui %5, %fixed-stack.0, 1 :: (store 8)
+    RET_ReallyLR
+
+    ; CHECK: COPY
+    ; CHECK-NEXT: COPY
+    ; CHECK-NEXT: ORRWrs
+    ; CHECK-NEXT: ORRWrs
+    ; CHECK-NEXT: STRXui
+    ; CHECK-NEXT: STRXui
+    ; CHECK-NEXT: RET
+...
diff --git a/llvm/test/CodeGen/AArch64/expand-select.ll b/llvm/test/CodeGen/AArch64/expand-select.ll
--- a/llvm/test/CodeGen/AArch64/expand-select.ll
+++ b/llvm/test/CodeGen/AArch64/expand-select.ll
@@ -4,20 +4,20 @@
 define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, <2 x i128> *%Out) {
 ; CHECK-LABEL: foo:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w9, w0, #0x1
+; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    fmov s0, wzr
-; CHECK-NEXT:    ldp x10, x8, [sp, #8]
-; CHECK-NEXT:    fmov s1, w9
-; CHECK-NEXT:    ldr x9, [sp]
+; CHECK-NEXT:    ldp x10, x9, [sp, #8]
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    ldr x8, [sp]
 ; CHECK-NEXT:    cmeq v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    fmov w11, s0
 ; CHECK-NEXT:    tst w11, #0x1
 ; CHECK-NEXT:    csel x11, x2, x6, ne
 ; CHECK-NEXT:    csel x12, x3, x7, ne
-; CHECK-NEXT:    csel x9, x4, x9, ne
+; CHECK-NEXT:    csel x8, x4, x8, ne
 ; CHECK-NEXT:    csel x10, x5, x10, ne
-; CHECK-NEXT:    stp x9, x10, [x8, #16]
-; CHECK-NEXT:    stp x11, x12, [x8]
+; CHECK-NEXT:    stp x8, x10, [x9, #16]
+; CHECK-NEXT:    stp x11, x12, [x9]
 ; CHECK-NEXT:    ret
   %cond = and i32 %In1, 1
   %cbool = icmp eq i32 %cond, 0
@@ -31,25 +31,25 @@
 define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, <2 x i96> *%Out) {
 ; CHECK-LABEL: bar:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w10, w0, #0x1
+; CHECK-NEXT:    and w9, w0, #0x1
 ; CHECK-NEXT:    fmov s0, wzr
-; CHECK-NEXT:    fmov s1, w10
+; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    cmeq v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ldp x11, x8, [sp, #8]
-; CHECK-NEXT:    ldr x9, [sp]
+; CHECK-NEXT:    ldr x10, [sp]
 ; CHECK-NEXT:    dup v1.4s, v0.s[0]
-; CHECK-NEXT:    mov x10, v1.d[1]
-; CHECK-NEXT:    lsr x10, x10, #32
-; CHECK-NEXT:    tst w10, #0x1
-; CHECK-NEXT:    fmov w10, s0
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    lsr x9, x9, #32
+; CHECK-NEXT:    tst w9, #0x1
+; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    csel x11, x5, x11, ne
-; CHECK-NEXT:    csel x9, x4, x9, ne
-; CHECK-NEXT:    tst w10, #0x1
-; CHECK-NEXT:    csel x10, x3, x7, ne
+; CHECK-NEXT:    csel x10, x4, x10, ne
+; CHECK-NEXT:    tst w9, #0x1
+; CHECK-NEXT:    csel x9, x3, x7, ne
 ; CHECK-NEXT:    csel x12, x2, x6, ne
-; CHECK-NEXT:    stur x9, [x8, #12]
+; CHECK-NEXT:    stur x10, [x8, #12]
 ; CHECK-NEXT:    str x12, [x8]
-; CHECK-NEXT:    str w10, [x8, #8]
+; CHECK-NEXT:    str w9, [x8, #8]
 ; CHECK-NEXT:    str w11, [x8, #20]
 ; CHECK-NEXT:    ret
   %cond = and i32 %In1, 1
diff --git a/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll b/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll
--- a/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll
+++ b/llvm/test/CodeGen/AArch64/tailcall_misched_graph.ll
@@ -39,8 +39,8 @@
 ; Make sure that there is an dependence edge between fi#-2 and fi#-4.
 ; Without this edge the scheduler would be free to move the store accross the load.
 
-; COMMON: SU({{.*}}):   [[VRB]]:gpr64 = LDRXui %fixed-stack.2
-; COMMON-NOT: SU
+; COMMON: {{^SU(.*)}}:   [[VRB]]:gpr64 = LDRXui %fixed-stack.2
+; COMMON-NOT: {{^SU(.*)}}:
 ; COMMON:  Successors:
 ; COMMON:   SU([[DEPSTOREB:.*]]): Ord  Latency=0
 ; COMMON:   SU([[DEPSTOREA:.*]]): Ord  Latency=0