Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16906,34 +16906,51 @@ return SDValue(); } -// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) -static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, - SelectionDAG &DAG) { - auto IsOneUseShiftC = [&](SDValue Shift) { - if (!Shift.hasOneUse()) - return false; +// Check an node is an extend or shift operand +static bool isExtendOrShiftOperand(SDValue N) { + unsigned Opcode = N.getOpcode(); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG || + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) { + EVT SrcVT; + if (Opcode == ISD::SIGN_EXTEND_INREG) + SrcVT = cast(N.getOperand(1))->getVT(); + else + SrcVT = N.getOperand(0).getValueType(); - // TODO: support SRL and SRA also - if (Shift.getOpcode() != ISD::SHL) + return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8; + } else if (Opcode == ISD::AND) { + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) return false; + uint64_t AndMask = CSD->getZExtValue(); + return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff; + } else if (Opcode == ISD::SHL || Opcode == ISD::SRL || Opcode == ISD::SRA) { + return isa(N.getOperand(1)); + } - if (!isa(Shift.getOperand(1))) - return false; - return true; + return false; +} + +// (N - Y) + Z --> (Z - Y) + N +// when N is an extend or shift operand +static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, + SelectionDAG &DAG) { + auto IsOneUseExtend = [](SDValue N) { + return N.hasOneUse() && isExtendOrShiftOperand(N); }; // DAGCombiner will revert the combination when Z is constant cause // dead loop. So don't enable the combination when Z is constant. // If Z is one use shift C, we also can't do the optimization. // It will falling to self infinite loop. - if (isa(Z) || IsOneUseShiftC(Z)) + if (isa(Z) || IsOneUseExtend(Z)) return SDValue(); if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse()) return SDValue(); SDValue Shift = SUB.getOperand(0); - if (!IsOneUseShiftC(Shift)) + if (!IsOneUseExtend(Shift)) return SDValue(); SDLoc DL(N); Index: llvm/test/CodeGen/AArch64/addsub.ll =================================================================== --- llvm/test/CodeGen/AArch64/addsub.ll +++ llvm/test/CodeGen/AArch64/addsub.ll @@ -711,9 +711,8 @@ define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, lsr #3 ; CHECK-NEXT: ret %lshr = lshr i32 %x, 3 %sub = sub i32 %lshr, %y @@ -725,9 +724,8 @@ define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_ashr: ; CHECK: // %bb.0: -; CHECK-NEXT: asr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, asr #3 ; CHECK-NEXT: ret %ashr = ashr i32 %x, 3 %sub = sub i32 %ashr, %y @@ -739,10 +737,8 @@ define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) { ; CHECK-LABEL: commute_subop0_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: add x0, x8, w0, sxtw ; CHECK-NEXT: ret %sext = sext i32 %x to i64 %sub = sub i64 %sext, %y @@ -754,9 +750,8 @@ define i64 @commute_subop0_sext_inreg(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: commute_subop0_sext_inreg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: sub x8, x8, x1 -; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: add x0, x8, w0, sxth ; CHECK-NEXT: ret %shl = shl i64 %x, 48 %ashr = ashr i64 %shl, 48 @@ -769,9 +764,8 @@ define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxth ; CHECK-NEXT: ret %zext = zext i16 %x to i32 %sub = sub i32 %zext, %y @@ -786,10 +780,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #222 ; CHECK-NEXT: mov w9, #111 -; CHECK-NEXT: mul w8, w1, w8 -; CHECK-NEXT: neg w8, w8 +; CHECK-NEXT: msub w8, w1, w8, w2 ; CHECK-NEXT: madd w8, w0, w9, w8 -; CHECK-NEXT: add w8, w8, w2 ; CHECK-NEXT: add w0, w8, w8, lsl #5 ; CHECK-NEXT: ret %aa = mul i16 %a, 111 @@ -807,9 +799,8 @@ define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_and: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxtb ; CHECK-NEXT: ret %and = and i32 %x, 255 %sub = sub i32 %and, %y