Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4203,8 +4203,9 @@
     // This is slightly expensive to compute for physical regs since
     // getMinimalPhysRegClass is slow.
     auto getRegClass = [&](unsigned Reg) {
-      return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
-                                              : TRI.getMinimalPhysRegClass(Reg);
+      return Register::isVirtualRegister(Reg)
+                 ? MRI.getRegClass(Reg)
+                 : TRI.getCrossCopyRegClass(TRI.getMinimalPhysRegClass(Reg));
     };
 
     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
Index: llvm/test/CodeGen/MIR/AArch64/greedy-spill-nzcv.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/MIR/AArch64/greedy-spill-nzcv.mir
@@ -0,0 +1,407 @@
+# RUN: llc -o - %s -mtriple=aarch64-- -run-pass=greedy | FileCheck %s --check-prefix=CHECK
+# Test spill of register nzcv.
+
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-unknown-linux-gnu"
+
+  @g = external dso_local unnamed_addr global i8, align 4
+  @i = external dso_local unnamed_addr global i16, align 4
+  @e = external dso_local unnamed_addr global i8, align 4
+  @_MergedGlobals = private global <{ i8, [3 x i8], i32, i64 }> <{ i8 1, [3 x i8] zeroinitializer, i32 60885424, i64 4 }>, align 8
+  @_MergedGlobals.1 = private global <{ i1, [7 x i8], i64 }> zeroinitializer, align 8
+
+  @a = internal alias i8, getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 0)
+  @c = internal alias i32, getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 2)
+  @b = internal alias i64, getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3)
+  @d = internal alias i1, getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 0)
+  @f = internal alias i64, getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 2)
+
+  define dso_local i32 @main() #0 {
+  entry:
+    ; CHECK: dead $xzr = SUBSXri [[LDRXui2:%.*]], 0, 0, implicit-def $nzcv
+    ; CHECK-NEXT: CCMPWi [[SUBWr:%.*]], 0, 4, 1, implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: STRXui $nzcv, [[STACK1:%.*]], 0 :: (store (s64) into [[STACK1]])
+    ; CHECK: $nzcv = LDRXui [[STACK1]], 0 :: (load (s64) from [[STACK1]])
+    %l.addr.i4 = alloca i64, align 8
+    br label %while.cond.outer
+
+  while.cond.outer:                                 ; preds = %aj.loopexit11.i, %entry
+    %.ph = phi i16 [ %13, %aj.loopexit11.i ], [ undef, %entry ]
+    %0 = load i64, i64* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 2), align 8
+    %conv22.i = zext i8 undef to i64
+    br label %while.cond.i
+
+  while.cond.i:                                     ; preds = %aj.loopexit.i, %while.cond.outer
+    %1 = phi i16 [ 0, %aj.loopexit.i ], [ %.ph, %while.cond.outer ]
+    %tobool.not.i = icmp eq i16 %1, 0
+    br i1 %tobool.not.i, label %while.end.i, label %while.body.i
+
+  while.body.i:                                     ; preds = %while.cond.i
+    %2 = load i8, i8* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 0), align 4, !tbaa !0
+    %3 = load i64, i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3), align 8, !tbaa !3
+    store i64 %3, i64* %l.addr.i4, align 8, !tbaa !3
+    %4 = load i8, i8* @e, align 4, !tbaa !0
+    %tobool.not.i4 = icmp eq i8 %4, 0
+    br i1 %tobool.not.i4, label %if.end35.i4, label %for.body
+
+  for.cond.i4:                                 ; preds = %if.end35.i4, %if.then8.i4, %if.then3.i4, %for.body
+    %5 = phi i8 [ %conv18.i4, %if.then8.i4 ], [ %6, %if.then3.i4 ], [ %6, %for.body ], [ undef, %if.end35.i4 ]
+    %m.1.i4 = phi i64* [ %m.0.i8.i3, %if.then8.i4 ], [ %m.0.i8.i3, %if.then3.i4 ], [ %m.0.i8.i3, %for.body ], [ %l.addr.i4, %if.end35.i4 ]
+    %p.1.i4 = phi i32 [ %mul.i4, %if.then8.i4 ], [ %p.0.i9.i3, %if.then3.i4 ], [ %p.0.i9.i3, %for.body ], [ undef, %if.end35.i4 ]
+    %j.addr.1.i4 = phi i8 [ %j.addr.0.i11.i3, %if.then8.i4 ], [ %j.addr.0.i11.i3, %if.then3.i4 ], [ %j.addr.0.i11.i3, %for.body ], [ undef, %if.end35.i4 ]
+    %tobool1.not.i4 = icmp eq i8 %j.addr.1.i4, 0
+    br i1 %tobool1.not.i4, label %if.end35.i4, label %for.body
+
+  for.body:                                         ; preds = %for.cond.i4, %while.body.i
+    %j.addr.0.i11.i3 = phi i8 [ %j.addr.1.i4, %for.cond.i4 ], [ 2, %while.body.i ]
+    %p.0.i9.i3 = phi i32 [ %p.1.i4, %for.cond.i4 ], [ 36, %while.body.i ]
+    %m.0.i8.i3 = phi i64* [ %m.1.i4, %for.cond.i4 ], [ undef, %while.body.i ]
+    %6 = phi i8 [ %5, %for.cond.i4 ], [ %4, %while.body.i ]
+    %7 = load i64, i64* %m.0.i8.i3, align 8, !tbaa !3
+    %conv.i4 = trunc i64 %7 to i8
+    store i8 %conv.i4, i8* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 0), align 4, !tbaa !0
+    br i1 undef, label %for.cond.i4, label %if.then3.i4
+
+  if.then3.i4:                                 ; preds = %for.body
+    %8 = load i8, i8* @g, align 4, !tbaa !0
+    %tobool5.not.i4 = icmp eq i8 %8, 0
+    %conv6.i4 = zext i8 %8 to i64
+    %cond.i4 = select i1 %tobool5.not.i4, i64 5, i64 %conv6.i4
+    %tobool7.not.i4 = icmp eq i64 %cond.i4, 0
+    br i1 %tobool7.not.i4, label %for.cond.i4, label %if.then8.i4
+
+  if.then8.i4:                                 ; preds = %if.then3.i4
+    %mul.i4 = mul i32 %p.0.i9.i3, 97
+    %tobool9.not.i4 = icmp eq i32 %mul.i4, 0
+    %tobool11.not.i4 = icmp eq i64 %7, 0
+    %not.tobool11.not.i4 = xor i1 %tobool11.not.i4, true
+    %not.tobool9.not.i4 = xor i1 %tobool9.not.i4, true
+    %tobool28.not.i4 = select i1 %not.tobool9.not.i4, i1 %not.tobool11.not.i4, i1 false
+    %9 = or i1 %tobool9.not.i4, %tobool11.not.i4
+    %conv13.i4 = zext i32 %mul.i4 to i64
+    %rem.lhs.trunc.i4 = select i1 %9, i16 66, i16 0
+    %rem11.i4 = srem i16 %rem.lhs.trunc.i4, %1
+    %10 = trunc i16 %rem11.i4 to i8
+    %conv18.i4 = mul i8 %6, %10
+    store i8 %conv18.i4, i8* @e, align 4, !tbaa !0
+    %spec.select12.i4 = select i1 %tobool9.not.i4, i64 %7, i64 %conv13.i4
+    store i64 %spec.select12.i4, i64* %m.0.i8.i3, align 8, !tbaa !3
+    %11 = trunc i64 %spec.select12.i4 to i8
+    %conv26.i4 = and i8 %8, %11
+    store i8 %conv26.i4, i8* @g, align 4, !tbaa !0
+    %.b.i = load i1, i1* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 0), align 4
+    %12 = select i1 %.b.i, i64 11, i64 0
+    %cond33.i4 = select i1 %tobool28.not.i4, i64 %spec.select12.i4, i64 %12
+    store i64 %cond33.i4, i64* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 2), align 8, !tbaa !3
+    br label %for.cond.i4
+
+  if.end35.i4:                                 ; preds = %for.cond.i4, %while.body.i
+    %tobool36.not.i4 = icmp eq i8 %2, 0
+    br i1 %tobool36.not.i4, label %aj.loopexit11.i, label %for.cond.i4
+
+  aj.loopexit11.i:                                  ; preds = %if.end35.i4
+    %13 = or i16 %.ph, 1
+    store i16 %13, i16* @i, align 4, !tbaa !5
+    br label %while.cond.outer
+
+  aj.loopexit.i:                                    ; preds = %for.cond19.preheader.i
+    store i32 %rem.peel.i, i32* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 2), align 4, !tbaa !7
+    store i64 %or23.peel.i.mux, i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3), align 8, !tbaa !3
+    br label %while.cond.i
+
+  while.end.i:                                      ; preds = %while.cond.i
+    %14 = load i32, i32* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 2), align 4, !tbaa !7
+    %tobool17.not.i = icmp eq i32 %14, 0
+    br i1 %tobool17.not.i, label %foo.exit, label %for.cond19.preheader.i
+
+  for.cond19.preheader.i:                           ; preds = %while.end.i
+    %.pre22.i = load i64, i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3), align 8, !tbaa !3
+    store i1 true, i1* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 0), align 4
+    %rem.peel.i = srem i32 %14, 13
+    %or23.peel.i = or i64 %.pre22.i, %0
+    %tobool24.not.peel.i = icmp eq i64 %or23.peel.i, %conv22.i
+    %tobool24.not.peel.i.not = xor i1 %tobool24.not.peel.i, true
+    %brmerge = or i1 %tobool24.not.peel.i.not, undef
+    %or23.peel.i.mux = select i1 %tobool24.not.peel.i.not, i64 %or23.peel.i, i64 undef
+    br i1 %brmerge, label %aj.loopexit.i, label %foo.exit
+
+  foo.exit:                                         ; preds = %for.cond19.preheader.i, %while.end.i
+    ret i32 0
+  }
+
+  attributes #0 = { "target-cpu"="tsv110" }
+
+  !0 = !{!1, !1, i64 0}
+  !1 = !{!"omnipotent char", !2, i64 0}
+  !2 = !{!"Simple C/C++ TBAA"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"long", !1, i64 0}
+  !5 = !{!6, !6, i64 0}
+  !6 = !{!"short", !1, i64 0}
+  !7 = !{!8, !8, i64 0}
+  !8 = !{!"int", !1, i64 0}
+
+...
+---
+name:            main
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: gpr64all }
+  - { id: 3, class: gpr32 }
+  - { id: 4, class: gpr32 }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: gpr32all }
+  - { id: 7, class: gpr64all }
+  - { id: 8, class: gpr32all }
+  - { id: 9, class: gpr32 }
+  - { id: 10, class: gpr32all }
+  - { id: 11, class: gpr32 }
+  - { id: 12, class: gpr64sp }
+  - { id: 13, class: gpr32 }
+  - { id: 14, class: gpr64common }
+  - { id: 15, class: gpr32common }
+  - { id: 16, class: gpr32all }
+  - { id: 17, class: gpr32 }
+  - { id: 18, class: gpr32common }
+  - { id: 19, class: gpr32 }
+  - { id: 20, class: gpr32 }
+  - { id: 21, class: gpr64 }
+  - { id: 22, class: gpr32all }
+  - { id: 23, class: gpr64common }
+  - { id: 24, class: gpr64 }
+  - { id: 25, class: gpr64all }
+  - { id: 26, class: gpr32common }
+  - { id: 27, class: gpr32 }
+  - { id: 28, class: gpr32 }
+  - { id: 29, class: gpr64all }
+  - { id: 30, class: gpr64common }
+  - { id: 31, class: gpr32 }
+  - { id: 32, class: gpr64 }
+  - { id: 33, class: gpr64common }
+  - { id: 34, class: gpr32 }
+  - { id: 35, class: gpr32 }
+  - { id: 36, class: gpr32 }
+  - { id: 37, class: gpr32all }
+  - { id: 38, class: gpr64sp }
+  - { id: 39, class: gpr32all }
+  - { id: 40, class: gpr64sp }
+  - { id: 41, class: gpr32common }
+  - { id: 42, class: gpr64common }
+  - { id: 43, class: gpr32 }
+  - { id: 44, class: gpr64common }
+  - { id: 45, class: gpr32 }
+  - { id: 46, class: gpr64common }
+  - { id: 47, class: gpr32 }
+  - { id: 48, class: gpr64common }
+  - { id: 49, class: gpr32 }
+  - { id: 50, class: gpr32 }
+  - { id: 51, class: gpr64 }
+  - { id: 52, class: gpr64 }
+  - { id: 53, class: gpr32 }
+  - { id: 54, class: gpr32common }
+  - { id: 55, class: gpr64 }
+  - { id: 56, class: gpr64common }
+  - { id: 57, class: gpr64 }
+  - { id: 58, class: gpr32 }
+  - { id: 59, class: gpr32 }
+  - { id: 60, class: gpr32 }
+  - { id: 61, class: gpr32 }
+  - { id: 62, class: gpr32 }
+  - { id: 63, class: gpr32 }
+  - { id: 64, class: gpr32 }
+  - { id: 65, class: gpr32 }
+  - { id: 66, class: gpr64common }
+  - { id: 67, class: gpr32 }
+  - { id: 68, class: gpr64 }
+  - { id: 69, class: gpr64common }
+  - { id: 70, class: gpr32common }
+  - { id: 71, class: gpr32 }
+  - { id: 72, class: gpr32 }
+  - { id: 73, class: gpr64 }
+  - { id: 74, class: gpr64 }
+  - { id: 75, class: gpr64 }
+  - { id: 76, class: gpr32 }
+  - { id: 77, class: gpr32 }
+  - { id: 78, class: gpr64common }
+  - { id: 79, class: gpr64 }
+  - { id: 80, class: gpr32common }
+  - { id: 81, class: gpr64common }
+  - { id: 82, class: gpr64common }
+  - { id: 83, class: gpr32 }
+  - { id: 84, class: gpr64common }
+  - { id: 85, class: gpr64 }
+  - { id: 86, class: gpr64common }
+  - { id: 87, class: gpr32 }
+  - { id: 88, class: gpr32 }
+  - { id: 89, class: gpr64 }
+  - { id: 90, class: gpr64 }
+  - { id: 91, class: gpr32 }
+  - { id: 92, class: gpr64 }
+  - { id: 93, class: gpr32 }
+  - { id: 94, class: gpr32 }
+  - { id: 95, class: gpr32 }
+  - { id: 96, class: gpr32 }
+  - { id: 97, class: gpr32 }
+  - { id: 98, class: gpr64 }
+  - { id: 99, class: gpr32 }
+  - { id: 100, class: gpr32all }
+  - { id: 101, class: gpr64common }
+  - { id: 102, class: gpr32all }
+  - { id: 103, class: gpr32all }
+  - { id: 104, class: gpr32common }
+  - { id: 105, class: gpr32 }
+  - { id: 106, class: gpr32 }
+  - { id: 107, class: gpr64sp }
+  - { id: 108, class: gpr32 }
+  - { id: 109, class: gpr32 }
+  - { id: 110, class: gpr32all }
+  - { id: 111, class: gpr32 }
+  - { id: 112, class: gpr64sp }
+  - { id: 113, class: gpr32 }
+frameInfo:
+  maxAlignment:    8
+  maxCallFrameSize: 0
+  localFrameSize:  8
+stack:
+  - { id: 0, name: l.addr.i4, size: 8, alignment: 8, local-offset: -8 }
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    %23:gpr64common = ADRP target-flags(aarch64-page) @_MergedGlobals.1 + 8
+    %82:gpr64common = ADRP target-flags(aarch64-page) @_MergedGlobals + 4
+    %84:gpr64common = ADRP target-flags(aarch64-page) @_MergedGlobals + 8
+    %86:gpr64common = ADRP target-flags(aarch64-page) @_MergedGlobals.1
+    %87:gpr32 = MOVi32imm 1
+    %101:gpr64common = MOVaddr target-flags(aarch64-page) @_MergedGlobals + 4, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 4
+    %88:gpr32 = MOVi32imm 1321528399
+    %30:gpr64common = MOVaddr target-flags(aarch64-page) @_MergedGlobals, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals
+    %95:gpr32 = MOVi32imm 13
+    %33:gpr64common = ADRP target-flags(aarch64-page) @e
+    %28:gpr32 = MOVi32imm 36
+    %81:gpr64common = ADRP target-flags(aarch64-page) @i
+    %44:gpr64common = ADRP target-flags(aarch64-page) @_MergedGlobals
+    %46:gpr64common = ADRP target-flags(aarch64-page) @g
+    undef %51.sub_32:gpr64 = MOVi32imm 5
+    %53:gpr32 = MOVi32imm 97
+    %59:gpr32 = MOVi32imm 66
+    undef %73.sub_32:gpr64 = MOVi32imm 11
+    %104:gpr32common = IMPLICIT_DEF
+    %69:gpr64common = MOVaddr target-flags(aarch64-page) @_MergedGlobals.1, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals.1
+
+  bb.1.while.cond.outer:
+    %24:gpr64 = LDRXui %23, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals.1 + 8 :: (dereferenceable load 8 from `i64* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 2)`)
+    %105:gpr32 = COPY %104
+
+  bb.2.while.cond.i:
+    successors: %bb.13(0x7c000000), %bb.3(0x04000000)
+
+    $wzr = ANDSWri %105, 15, implicit-def $nzcv
+    Bcc 0, %bb.13, implicit $nzcv
+    B %bb.3
+
+  bb.3.while.body.i:
+    %31:gpr32 = LDRBBui %30, 0 :: (dereferenceable load 1 from `i8* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 0)`, align 8, !tbaa !0)
+    %32:gpr64 = LDRXui %30, 1 :: (dereferenceable load 8 from `i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3)`, !tbaa !3)
+    STRXui %32, %stack.0.l.addr.i4, 0 :: (store 8 into %ir.l.addr.i4, !tbaa !3)
+    %106:gpr32 = LDRBBui %33, target-flags(aarch64-pageoff, aarch64-nc) @e :: (dereferenceable load 1 from @e, align 4, !tbaa !0)
+    CBZW %106, %bb.9
+
+  bb.4:
+    %109:gpr32 = MOVi32imm 2
+    undef %56.sub_32:gpr64common = COPY %28
+    %107:gpr64sp = IMPLICIT_DEF
+    B %bb.6
+
+  bb.5.for.cond.i4:
+    $wzr = ANDSWri %109, 7, implicit-def $nzcv
+    Bcc 0, %bb.9, implicit $nzcv
+    B %bb.6
+
+  bb.6.for.body:
+    %42:gpr64common = LDRXui %107, 0 :: (load 8 from %ir.m.0.i8.i3, !tbaa !3)
+    STRBBui %42.sub_32, %44, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals :: (store 1 into `i8* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 0)`, align 8, !tbaa !0)
+    CBZW $wzr, %bb.7
+    B %bb.5
+
+  bb.7.if.then3.i4:
+    undef %48.sub_32:gpr64common = LDRBBui %46, target-flags(aarch64-pageoff, aarch64-nc) @g :: (dereferenceable load 1 from @g, align 4, !tbaa !0)
+    dead $wzr = SUBSWri %48.sub_32, 0, 0, implicit-def $nzcv
+    %52:gpr64 = CSELXr %51, %48, 0, implicit $nzcv
+    CBNZX %52, %bb.8
+    B %bb.5
+
+  bb.8.if.then8.i4:
+    undef %56.sub_32:gpr64common = MADDWrrr %56.sub_32, %53, $wzr
+    dead $xzr = SUBSXri %42, 0, 0, implicit-def $nzcv
+    CCMPWi %56.sub_32, 0, 4, 1, implicit-def $nzcv, implicit $nzcv
+    %57:gpr64 = COPY $nzcv
+    %60:gpr32 = CSELWr %59, $wzr, 0, implicit $nzcv
+    %61:gpr32 = SBFMWri %105, 0, 15
+    %62:gpr32 = SDIVWr %60, %61
+    %64:gpr32 = MSUBWrrr %62, %61, %60
+    %106:gpr32 = MADDWrrr %106, %64, $wzr
+    STRBBui %106, %33, target-flags(aarch64-pageoff, aarch64-nc) @e :: (store 1 into @e, align 4, !tbaa !0)
+    dead $wzr = SUBSWri %56.sub_32, 0, 0, implicit-def $nzcv
+    %68:gpr64 = CSELXr %42, %56, 0, implicit $nzcv
+    STRXui %68, %107, 0 :: (store 8 into %ir.m.0.i8.i3, !tbaa !3)
+    %70:gpr32common = LDRBBui %69, 0 :: (dereferenceable load 1 from `i1* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 0)`, align 8)
+    dead $wzr = SUBSWri %70, 0, 0, implicit-def $nzcv
+    %75:gpr64 = CSELXr %73, $xzr, 1, implicit $nzcv
+    %77:gpr32 = ANDWrr %48.sub_32, %68.sub_32
+    STRBBui %77, %46, target-flags(aarch64-pageoff, aarch64-nc) @g :: (store 1 into @g, align 4, !tbaa !0)
+    $nzcv = COPY %57
+    %79:gpr64 = CSELXr %68, %75, 1, implicit $nzcv
+    STRXui %79, %69, 1 :: (store 8 into `i64* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 2)`, !tbaa !3)
+    B %bb.5
+
+  bb.9.if.end35.i4:
+    CBZW %31, %bb.11
+
+  bb.10:
+    %107:gpr64sp = ADDXri %stack.0.l.addr.i4, 0, 0
+    %106:gpr32 = IMPLICIT_DEF
+    undef %56.sub_32:gpr64common = IMPLICIT_DEF
+    %109:gpr32 = IMPLICIT_DEF
+    B %bb.5
+
+  bb.11.aj.loopexit11.i:
+    %104:gpr32common = ORRWri %104, 0
+    STRHHui %104, %81, target-flags(aarch64-pageoff, aarch64-nc) @i :: (store 2 into @i, align 4, !tbaa !5)
+    B %bb.1
+
+  bb.12.aj.loopexit.i:
+    %105:gpr32 = COPY $wzr
+    %89:gpr64 = SMADDLrrr %83, %88, $xzr
+    %90:gpr64 = UBFMXri %89, 63, 63
+    %92:gpr64 = SBFMXri %89, 34, 63
+    %94:gpr32 = ADDWrr %92.sub_32, %90.sub_32
+    %97:gpr32 = MSUBWrrr %94, %95, %83
+    STRWui %97, %101, 0 :: (store 4 into `i32* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 2)`, !tbaa !7)
+    STURXi %98, %101, 4 :: (store 8 into `i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3)`, !tbaa !3)
+    B %bb.2
+
+  bb.13.while.end.i:
+    %83:gpr32 = LDRWui %82, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 4 :: (dereferenceable load 4 from `i32* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 2)`, !tbaa !7)
+    CBZW %83, %bb.16
+    B %bb.14
+
+  bb.14.for.cond19.preheader.i:
+    %85:gpr64 = LDRXui %84, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals + 8 :: (dereferenceable load 8 from `i64* getelementptr inbounds (<{ i8, [3 x i8], i32, i64 }>, <{ i8, [3 x i8], i32, i64 }>* @_MergedGlobals, i32 0, i32 3)`, !tbaa !3)
+    STRBBui %87, %86, target-flags(aarch64-pageoff, aarch64-nc) @_MergedGlobals.1 :: (store 1 into `i1* getelementptr inbounds (<{ i1, [7 x i8], i64 }>, <{ i1, [7 x i8], i64 }>* @_MergedGlobals.1, i32 0, i32 0)`, align 8)
+    %98:gpr64 = ORRXrr %85, %24
+    CBNZX %98, %bb.12
+    B %bb.15
+
+  bb.15.for.cond19.preheader.i:
+    CBNZW $wzr, %bb.12
+    B %bb.16
+
+  bb.16.foo.exit:
+    $w0 = COPY $wzr
+    RET_ReallyLR implicit killed $w0
+
+...