Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -378,8 +378,23 @@
 
   // Optimization methods.
   bool tryOptSelect(MachineInstr &MI) const;
+
+  /// Helper function for comparison selection.
+  /// \returns true if \p MI, as an operand of a comparison with condition code
+  /// \p CC is a CMN.
+  bool isCMN(MachineInstr *MI, const AArch64CC::CondCode &CC,
+             const MachineRegisterInfo &MRI) const;
+
+  /// \returns the MachineOperands and CmpInst::Predicate which should be used
+  /// to emit a compare. If swapping \p LHS and \p RHS would introduce
+  /// profitable folding opportunities, swap them and return an updated
+  /// predicate.
+  std::tuple<MachineOperand &, MachineOperand &, CmpInst::Predicate>
+  trySwapCmpLHSAndRHS(MachineOperand &LHS, MachineOperand &RHS,
+                      CmpInst::Predicate P,
+                      const MachineRegisterInfo &MRI) const;
   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
-                                      MachineOperand &Predicate,
+                                      CmpInst::Predicate P,
                                       MachineIRBuilder &MIRBuilder) const;
   MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
                                                MachineOperand &RHS,
@@ -441,6 +456,14 @@
 {
 }
 
+/// \returns true if \p C is a legal arithmetic immediate.
+static bool isLegalArithImmed(uint64_t C) {
+  bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+  LLVM_DEBUG(dbgs() << "Is imm " << C
+                    << " legal: " << (IsLegal ? "yes\n" : "no\n"));
+  return IsLegal;
+}
+
 // FIXME: This should be target-independent, inferred from the types declared
 // for each class in the bank.
 static const TargetRegisterClass *
@@ -3876,6 +3899,93 @@
   return emitInstr(OpcTable[2][Is32Bit], {ZReg}, {LHS, RHS}, MIRBuilder);
 }
 
+/// \returns how profitable it is to fold a comparison's operand's shift and/or
+/// extension operations. This refers to how many instructions may be folded
+/// if \p Reg is made the RHS of a compare.
+static unsigned getCmpOperandFoldingProfit(Register Reg,
+                                           const MachineRegisterInfo &MRI) {
+  if (!MRI.hasOneNonDBGUse(Reg))
+    return 0;
+
+  auto IsSupportedExtend = [&](const MachineInstr &MI) {
+    if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
+      return true;
+    if (MI.getOpcode() != TargetOpcode::G_AND)
+      return false;
+    auto ValAndVReg =
+        getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+    if (!ValAndVReg)
+      return false;
+    uint64_t Mask = ValAndVReg->Value;
+    return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+  };
+
+  MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+  if (IsSupportedExtend(*Def))
+    return 1;
+
+  unsigned Opc = Def->getOpcode();
+  if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
+      Opc != TargetOpcode::G_LSHR)
+    return 0;
+
+  auto ValAndVReg =
+      getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
+  if (!ValAndVReg)
+    return 0;
+  uint64_t Shift = ValAndVReg->Value;
+  MachineInstr *ShiftLHS =
+      getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
+  if (IsSupportedExtend(*ShiftLHS))
+    return (Shift <= 4) ? 2 : 1;
+  LLT Ty = MRI.getType(Def->getOperand(0).getReg());
+  if (Ty.isVector())
+    return 0;
+  unsigned ShiftSize = Ty.getSizeInBits();
+  if ((ShiftSize == 32 && Shift <= 31) || (ShiftSize == 64 && Shift <= 63))
+    return 1;
+  return 0;
+}
+
+std::tuple<MachineOperand &, MachineOperand &, CmpInst::Predicate>
+AArch64InstructionSelector::trySwapCmpLHSAndRHS(
+    MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate P,
+    const MachineRegisterInfo &MRI) const {
+  // Swap the operands if it would introduce a profitable folding opportunity.
+  // (e.g. a shift + extend).
+  //
+  //  For example:
+  //    lsl     w13, w11, #1
+  //    cmp     w13, w12
+  // can be turned into:
+  //    cmp     w12, w11, lsl #1
+
+  // Don't swap if there's a constant on the RHS, because we know we can fold
+  // that.
+  Register RHSReg = RHS.getReg();
+  auto RHSCst = getConstantVRegValWithLookThrough(RHSReg, MRI);
+  if (RHSCst && isLegalArithImmed(RHSCst->Value))
+    return {LHS, RHS, P};
+  auto CC =  changeICMPPredToAArch64CC(P);
+  auto GetRegForProfit = [&](Register Reg) {
+    MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+    return isCMN(Def, CC, MRI) ? Def->getOperand(2).getReg() : Reg;
+  };
+
+  // Don't have a constant on the RHS. Would swapping the LHS and RHS introduce
+  // an opportunity to fold a constant?
+  Register TheLHS = GetRegForProfit(LHS.getReg());
+  Register TheRHS = GetRegForProfit(RHS.getReg());
+
+  // If the LHS is more likely to give us a folding opportunity, then swap the
+  // LHS and RHS.
+  if (getCmpOperandFoldingProfit(TheLHS, MRI) >
+      getCmpOperandFoldingProfit(TheRHS, MRI))
+    return {RHS, LHS, CmpInst::getSwappedPredicate(P)};
+
+  return {LHS, RHS, P};
+}
+
 std::pair<MachineInstr *, CmpInst::Predicate>
 AArch64InstructionSelector::emitIntegerCompare(
     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
@@ -3884,11 +3994,15 @@
   assert(Predicate.isPredicate() && "Expected predicate?");
   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
 
-  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
+  auto MaybeSwappedCmp = trySwapCmpLHSAndRHS(
+      LHS, RHS, static_cast<CmpInst::Predicate>(Predicate.getPredicate()), MRI);
+  MachineOperand &CmpLHS = std::get<0>(MaybeSwappedCmp);
+  MachineOperand &CmpRHS = std::get<1>(MaybeSwappedCmp);
+  CmpInst::Predicate P = std::get<2>(MaybeSwappedCmp);
 
   // Fold the compare if possible.
   MachineInstr *FoldCmp =
-      tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
+      tryFoldIntegerCompare(CmpLHS, CmpRHS, P, MIRBuilder);
   if (FoldCmp)
     return {FoldCmp, P};
 
@@ -3896,7 +4010,7 @@
   unsigned CmpOpc = 0;
   Register ZReg;
 
-  LLT CmpTy = MRI.getType(LHS.getReg());
+  LLT CmpTy = MRI.getType(CmpLHS.getReg());
   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
          "Expected scalar or pointer");
   if (CmpTy == LLT::scalar(32)) {
@@ -3911,18 +4025,19 @@
 
   // Try to match immediate forms.
   MachineInstr *ImmedCmp =
-      tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
+      tryOptArithImmedIntegerCompare(CmpLHS, CmpRHS, P, MIRBuilder);
   if (ImmedCmp)
     return {ImmedCmp, P};
 
   // If we don't have an immediate, we may have a shift which can be folded
   // into the compare.
-  MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
+  MachineInstr *ShiftedCmp =
+      tryOptArithShiftedCompare(CmpLHS, CmpRHS, MIRBuilder);
   if (ShiftedCmp)
     return {ShiftedCmp, P};
 
   auto CmpMI =
-      MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
+      MIRBuilder.buildInstr(CmpOpc, {ZReg}, {CmpLHS.getReg(), CmpRHS.getReg()});
   // Make sure that we can constrain the compare that we emitted.
   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
   return {&*CmpMI, P};
@@ -4203,11 +4318,25 @@
   return true;
 }
 
+bool AArch64InstructionSelector::isCMN(MachineInstr *MI,
+                                       const AArch64CC::CondCode &CC,
+                                       const MachineRegisterInfo &MRI) const {
+  if (!MI || MI->getOpcode() != TargetOpcode::G_SUB)
+    return false;
+  // Need to make sure NZCV is the same at the end of the transformation.
+  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+    return false;
+
+  // Match: x = G_SUB 0, y
+  auto ValAndVReg =
+      getConstantVRegValWithLookThrough(MI->getOperand(1).getReg(), MRI);
+  return ValAndVReg && ValAndVReg->Value == 0;
+}
+
 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
-    MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
+    MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate P,
     MachineIRBuilder &MIRBuilder) const {
-  assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
-         "Unexpected MachineOperand");
+  assert(LHS.isReg() && RHS.isReg() && "Unexpected MachineOperand");
   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
   // We want to find this sort of thing:
   // x = G_SUB 0, y
@@ -4218,35 +4347,9 @@
   //
   // cmn z, y
 
-  // Helper lambda to detect the subtract followed by the compare.
-  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
-  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
-    if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
-      return false;
-
-    // Need to make sure NZCV is the same at the end of the transformation.
-    if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
-      return false;
-
-    // We want to match against SUBs.
-    if (DefMI->getOpcode() != TargetOpcode::G_SUB)
-      return false;
-
-    // Make sure that we're getting
-    // x = G_SUB 0, y
-    auto ValAndVReg =
-        getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
-    if (!ValAndVReg || ValAndVReg->Value != 0)
-      return false;
-
-    // This can safely be represented as a CMN.
-    return true;
-  };
-
   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
-  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
 
   // Given this:
@@ -4257,7 +4360,7 @@
   // Produce this:
   //
   // cmn y, z
-  if (IsCMN(LHSDef, CC))
+  if (isCMN(LHSDef, CC, MRI))
     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
 
   // Same idea here, but with the RHS of the compare instead:
@@ -4270,7 +4373,7 @@
   // Produce this:
   //
   // cmn z, y
-  if (IsCMN(RHSDef, CC))
+  if (isCMN(RHSDef, CC, MRI))
     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
 
   // Given this:
Index: llvm/test/CodeGen/AArch64/GlobalISel/select-swap-compare-operands.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/GlobalISel/select-swap-compare-operands.mir
@@ -0,0 +1,571 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check that we swap the order of operands on comparisons when it is likely
+# to introduce a folding opportunity.
+#
+# The condition code for the compare should be changed when appropriate.
+
+...
+---
+name:            swap_sextinreg_lhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: swap_sextinreg_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg, 0, 0
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg:gpr(s64) = COPY $x0
+    %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1
+    %cmp_rhs:gpr(s64) = COPY $x1
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_more_than_one_use
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; The LHS of the compare is used in an add, and a second compare. Don't
+    ; swap, since we don't gain any folding opportunities here.
+
+    ; CHECK-LABEL: name: dont_swap_more_than_one_use
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg0:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg0, 0, 0
+    ; CHECK: %add:gpr64 = ADDXrr %cmp_lhs, %reg0
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %add, implicit-def $nzcv
+    ; CHECK: %cmp2:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp2
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s64) = COPY $x0
+    %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg0, 1
+    %reg1:gpr(s64) = COPY $x1
+    %cmp1:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %reg1
+
+    %add:gpr(s64) = G_ADD %cmp_lhs(s64), %reg0
+    %cmp2:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add
+
+    $w0 = COPY %cmp2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_legal_arith_immed_on_rhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; Arithmetic immediates can be folded into compares. If we have one, then
+    ; don't bother changing anything.
+
+    ; CHECK-LABEL: name: dont_swap_legal_arith_immed_on_rhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64common = SBFMXri %reg, 0, 0
+    ; CHECK: $xzr = SUBSXri %cmp_lhs, 12, 0, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg:gpr(s64) = COPY $x0
+    %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1
+    %cmp_rhs:gpr(s64) = G_CONSTANT i64 12
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_non_arith_immed_on_rhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; If we have a non-arithmetic immediate on the rhs, then we can swap to get
+    ; a guaranteed folding opportunity.
+
+    ; CHECK-LABEL: name: swap_non_arith_immed_on_rhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64 = SBFMXri %reg, 0, 0
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1234567
+    ; CHECK: %cmp_rhs:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg:gpr(s64) = COPY $x0
+    %cmp_lhs:gpr(s64) = G_SEXT_INREG %reg, 1
+    %cmp_rhs:gpr(s64) = G_CONSTANT i64 1234567
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFF
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: swap_and_lhs_0xFF
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %and_lhs:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4103
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %and_lhs:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 255
+    %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFFFF
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: swap_and_lhs_0xFFFF
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %and_lhs:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4111
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %cst:gpr(s64) = G_CONSTANT i64 65535
+    %and_lhs:gpr(s64) = COPY $x0
+    %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFFFFFFFF
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: swap_and_lhs_0xFFFFFFFF
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %and_lhs:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4127
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_rhs, %cmp_lhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %and_lhs:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 4294967295
+    %cmp_lhs:gpr(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_and_lhs_wrong_mask
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; 7 isn't an extend mask for G_AND, so there's no folding opportunities
+    ; here.
+    ;
+    ; CHECK-LABEL: name: dont_swap_and_lhs_wrong_mask
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %and_lhs:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4098
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %and_lhs:gpr(s64) = COPY $x0
+    %not_an_extend_mask:gpr(s64) = G_CONSTANT i64 7
+    %cmp_lhs:gpr(s64) = G_AND %and_lhs, %not_an_extend_mask(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_shl_lhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: swap_shl_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %shl_lhs:gpr64 = COPY $x0
+    ; CHECK: $xzr = SUBSXrs %cmp_rhs, %shl_lhs, 1, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %shl_lhs:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_lhs:gpr(s64) = G_SHL %shl_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_ashr_lhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: swap_ashr_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %ashr_lhs:gpr64 = COPY $x0
+    ; CHECK: $xzr = SUBSXrs %cmp_rhs, %ashr_lhs, 129, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %ashr_lhs:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_lhs:gpr(s64) = G_ASHR %ashr_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_lshr_lhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: swap_lshr_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %lshr_lhs:gpr64 = COPY $x0
+    ; CHECK: $xzr = SUBSXrs %cmp_rhs, %lshr_lhs, 65, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %lshr_lhs:gpr(s64) = COPY $x0
+    %cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_lhs:gpr(s64) = G_LSHR %lshr_lhs, %cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_shift_s64_cst_too_large
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; Constant for the shift must be <= 63.
+
+    ; CHECK-LABEL: name: dont_swap_shift_s64_cst_too_large
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %shl_lhs:gpr64 = COPY $x0
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64
+    ; CHECK: %too_large:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+    ; CHECK: %cmp_lhs:gpr64 = LSLVXr %shl_lhs, %too_large
+    ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %shl_lhs:gpr(s64) = COPY $x0
+    %too_large:gpr(s64) = G_CONSTANT i64 64
+    %cmp_lhs:gpr(s64) = G_SHL %shl_lhs, %too_large(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            dont_swap_shift_s32_cst_too_large
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    ; Constant for the shift must be <= 32.
+
+    ; CHECK-LABEL: name: dont_swap_shift_s32_cst_too_large
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %cmp_rhs:gpr32 = COPY $w1
+    ; CHECK: %shl_lhs:gpr32 = COPY $w0
+    ; CHECK: %cst:gpr32 = MOVi32imm 32
+    ; CHECK: %cmp_lhs:gpr32 = LSLVWr %shl_lhs, %cst
+    ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s32) = COPY $w1
+
+    %shl_lhs:gpr(s32) = COPY $w0
+    %cst:gpr(s32) = G_CONSTANT i32 32
+    %cmp_lhs:gpr(s32) = G_SHL %shl_lhs, %cst(s32)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_cmn_lhs_no_folding_opportunities
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; No reason to swap a CMN on the LHS when it won't introduce a constant
+    ; folding opportunity. We can recognise CMNs on the LHS and RHS, so there's
+    ; nothing to gain here.
+
+    ; CHECK-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %sub_rhs:gpr64 = COPY $x0
+    ; CHECK: $xzr = ADDSXrr %sub_rhs, %cmp_rhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %sub_rhs:gpr(s64) = COPY $x0
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %cmp_lhs:gpr(s64) = G_SUB %zero, %sub_rhs
+
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_cmn_lhs
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; Swap when we can see a constant folding opportunity through the sub on
+    ; the LHS.
+
+
+    ; CHECK-LABEL: name: swap_cmn_lhs
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %cmp_rhs:gpr64 = COPY $x1
+    ; CHECK: %shl_lhs:gpr64 = COPY $x0
+    ; CHECK: $xzr = ADDSXrs %cmp_rhs, %shl_lhs, 63, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %cmp_rhs:gpr(s64) = COPY $x1
+
+    %shl_lhs:gpr(s64) = COPY $x0
+    %zero:gpr(s64) = G_CONSTANT i64 0
+    %cst:gpr(s64) = G_CONSTANT i64 63
+    %sub_rhs:gpr(s64) = G_SHL %shl_lhs, %cst(s64)
+    %cmp_lhs:gpr(s64) = G_SUB %zero, %sub_rhs
+
+    %cmp:gpr(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_cmn_lhs_when_rhs_more_profitable
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; Don't swap when the RHS's subtract offers a better constant folding
+    ; opportunity than the LHS's subtract.
+    ;
+    ; In this case, the RHS has a supported extend, plus a shift with a constant
+    ; <= 4.
+
+    ; CHECK-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %zero:gpr64 = COPY $xzr
+    ; CHECK: %reg0:gpr64 = COPY $x0
+    ; CHECK: %shl:gpr64 = UBFMXri %reg0, 1, 0
+    ; CHECK: %reg1:gpr64 = COPY $x1
+    ; CHECK: %sext_in_reg:gpr64 = SBFMXri %reg1, 0, 0
+    ; CHECK: %cmp_rhs:gpr64 = SUBSXrs %zero, %sext_in_reg, 131, implicit-def $nzcv
+    ; CHECK: $xzr = ADDSXrr %shl, %cmp_rhs, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %zero:gpr(s64) = G_CONSTANT i64 0
+
+    %reg0:gpr(s64) = COPY $x0
+    %shl_cst:gpr(s64) = G_CONSTANT i64 63
+    %shl:gpr(s64) = G_SHL %reg0, %shl_cst(s64)
+    %cmp_lhs:gpr(s64) = G_SUB %zero, %shl
+
+    %reg1:gpr(s64) = COPY $x1
+    %sext_in_reg:gpr(s64) = G_SEXT_INREG %reg1, 1
+    %ashr_cst:gpr(s64) = G_CONSTANT i64 3
+    %ashr:gpr(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64)
+    %cmp_rhs:gpr(s64) = G_SUB %zero, %ashr
+
+    %cmp:gpr(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_rhs_with_supported_extend
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; The RHS offers more constant folding opportunities than the LHS.
+
+    ; CHECK-LABEL: name: dont_swap_rhs_with_supported_extend
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg0:gpr64 = COPY $x0
+    ; CHECK: %cmp_lhs:gpr64 = UBFMXri %reg0, 63, 62
+    ; CHECK: %reg1:gpr64 = COPY $x1
+    ; CHECK: %and:gpr64common = ANDXri %reg1, 4103
+    ; CHECK: $xzr = SUBSXrs %cmp_lhs, %and, 129, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s64) = COPY $x0
+    %lhs_cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_lhs:gpr(s64) = G_SHL %reg0, %lhs_cst(s64)
+
+    %reg1:gpr(s64) = COPY $x1
+    %and_mask:gpr(s64) = G_CONSTANT i64 255
+    %and:gpr(s64) = G_AND %reg1, %and_mask(s64)
+    %rhs_cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_rhs:gpr(s64) = G_ASHR %and, %rhs_cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            swap_rhs_with_supported_extend
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; In this case, both the LHS and RHS are fed by a supported extend. However,
+    ; the LHS' shift has a constant <= 4. This makes it more profitable, so
+    ; we should swap the operands.
+
+    ; CHECK-LABEL: name: swap_rhs_with_supported_extend
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %reg0:gpr64 = COPY $x0
+    ; CHECK: %and:gpr64common = ANDXri %reg0, 4103
+    ; CHECK: %cmp_rhs:gpr64 = SBFMXri %and, 5, 63
+    ; CHECK: $xzr = SUBSXrs %cmp_rhs, %and, 1, implicit-def $nzcv
+    ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; CHECK: $w0 = COPY %cmp
+    ; CHECK: RET_ReallyLR implicit $w0
+    %reg0:gpr(s64) = COPY $x0
+    %and_mask:gpr(s64) = G_CONSTANT i64 255
+    %and:gpr(s64) = G_AND %reg0, %and_mask(s64)
+
+    %lhs_cst:gpr(s64) = G_CONSTANT i64 1
+    %cmp_lhs:gpr(s64) = G_SHL %and, %lhs_cst(s64)
+
+    %rhs_cst:gpr(s64) = G_CONSTANT i64 5
+    %cmp_rhs:gpr(s64) = G_ASHR %and, %rhs_cst(s64)
+
+    %cmp:gpr(s32) = G_ICMP intpred(sgt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+