Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16854,6 +16854,30 @@
   return SDValue();
 }
 
+// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
+static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
+                                         SelectionDAG &DAG) {
+  if (isa<ConstantSDNode>(Z))
+    return SDValue();
+
+  if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
+    return SDValue();
+
+  SDValue SHL = SUB.getOperand(0);
+  if (SHL.getOpcode() != ISD::SHL || !SHL.hasOneUse())
+    return SDValue();
+
+  if (!isa<ConstantSDNode>(SHL.getOperand(1)))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  SDValue Y = SUB.getOperand(1);
+  SDValue NewSub = DAG.getNode(ISD::SUB, DL, VT, Z, Y);
+  return DAG.getNode(ISD::ADD, DL, VT, NewSub, SHL);
+}
+
 static SDValue performAddCombineForShiftedOperands(SDNode *N,
                                                    SelectionDAG &DAG) {
   // NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
@@ -16871,6 +16895,11 @@
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
+  if (SDValue Val = performAddCombineSubShift(N, LHS, RHS, DAG))
+    return Val;
+  if (SDValue Val = performAddCombineSubShift(N, RHS, LHS, DAG))
+    return Val;
+
   uint64_t LHSImm = 0, RHSImm = 0;
   // If both operand are shifted by imm and shift amount is not greater than 4
   // for one operand, swap LHS and RHS to put operand with smaller shift amount
Index: llvm/test/CodeGen/AArch64/addsub.ll
===================================================================
--- llvm/test/CodeGen/AArch64/addsub.ll
+++ llvm/test/CodeGen/AArch64/addsub.ll
@@ -697,9 +697,8 @@
 define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w8, w2
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %shl = shl i32 %x, 3
   %sub = sub i32 %shl, %y
@@ -710,9 +709,8 @@
 define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: commute_subop0_cadd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    add w0, w2, w8
+; CHECK-NEXT:    sub w8, w2, w1
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %shl = shl i32 %x, 3
   %sub = sub i32 %shl, %y
@@ -723,9 +721,8 @@
 define i32 @commute_subop0_mul(i32 %x, i32 %y) {
 ; CHECK-LABEL: commute_subop0_mul:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl w8, w0, #3
-; CHECK-NEXT:    sub w8, w8, w0
-; CHECK-NEXT:    add w0, w8, w1
+; CHECK-NEXT:    sub w8, w1, w0
+; CHECK-NEXT:    add w0, w8, w0, lsl #3
 ; CHECK-NEXT:    ret
   %mul = mul i32 %x, 7
   %add = add i32 %mul, %y