diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16906,34 +16906,51 @@ return SDValue(); } -// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) -static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, - SelectionDAG &DAG) { - auto IsOneUseShiftC = [&](SDValue Shift) { - if (!Shift.hasOneUse()) - return false; +// Check an node is an extend or shift operand +static bool isExtendOrShiftOperand(SDValue N) { + unsigned Opcode = N.getOpcode(); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG || + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) { + EVT SrcVT; + if (Opcode == ISD::SIGN_EXTEND_INREG) + SrcVT = cast(N.getOperand(1))->getVT(); + else + SrcVT = N.getOperand(0).getValueType(); - // TODO: support SRL and SRA also - if (Shift.getOpcode() != ISD::SHL) + return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8; + } else if (Opcode == ISD::AND) { + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) return false; + uint64_t AndMask = CSD->getZExtValue(); + return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff; + } else if (Opcode == ISD::SHL || Opcode == ISD::SRL || Opcode == ISD::SRA) { + return isa(N.getOperand(1)); + } - if (!isa(Shift.getOperand(1))) - return false; - return true; + return false; +} + +// (N - Y) + Z --> (Z - Y) + N +// when N is an extend or shift operand +static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, + SelectionDAG &DAG) { + auto IsOneUseExtend = [](SDValue N) { + return N.hasOneUse() && isExtendOrShiftOperand(N); }; // DAGCombiner will revert the combination when Z is constant cause // dead loop. So don't enable the combination when Z is constant. // If Z is one use shift C, we also can't do the optimization. // It will falling to self infinite loop. - if (isa(Z) || IsOneUseShiftC(Z)) + if (isa(Z) || IsOneUseExtend(Z)) return SDValue(); if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse()) return SDValue(); SDValue Shift = SUB.getOperand(0); - if (!IsOneUseShiftC(Shift)) + if (!IsOneUseExtend(Shift)) return SDValue(); SDLoc DL(N); diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -711,9 +711,8 @@ define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, lsr #3 ; CHECK-NEXT: ret %lshr = lshr i32 %x, 3 %sub = sub i32 %lshr, %y @@ -725,9 +724,8 @@ define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_ashr: ; CHECK: // %bb.0: -; CHECK-NEXT: asr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, asr #3 ; CHECK-NEXT: ret %ashr = ashr i32 %x, 3 %sub = sub i32 %ashr, %y @@ -739,10 +737,8 @@ define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) { ; CHECK-LABEL: commute_subop0_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: add x0, x8, w0, sxtw ; CHECK-NEXT: ret %sext = sext i32 %x to i64 %sub = sub i64 %sext, %y @@ -754,9 +750,8 @@ define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: commute_subop0_sext_inreg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: add x0, x8, w0, sxth ; CHECK-NEXT: ret %shl = shl i64 %x, 48 %ashr = ashr i64 %shl, 48 @@ -769,9 +764,8 @@ define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxth ; CHECK-NEXT: ret %zext = zext i16 %x to i32 %sub = sub i32 %zext, %y @@ -785,9 +779,8 @@ ; CHECK-LABEL: commute_subop0_anyext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #111 -; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: sub w9, w2, w1 ; CHECK-NEXT: madd w8, w0, w8, w9 -; CHECK-NEXT: add w8, w8, w2 ; CHECK-NEXT: lsl w8, w8, #3 ; CHECK-NEXT: sub w0, w8, #1776 ; CHECK-NEXT: ret @@ -806,9 +799,8 @@ define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_and: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxtb ; CHECK-NEXT: ret %and = and i32 %x, 255 %sub = sub i32 %and, %y