Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16906,34 +16906,54 @@ return SDValue(); } -// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) -static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, - SelectionDAG &DAG) { - auto IsOneUseShiftC = [&](SDValue Shift) { - if (!Shift.hasOneUse()) - return false; +/// Check an node can be extend or not +static bool canbeExtend(SDValue N) { + unsigned Opcode = N.getOpcode(); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG || + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) { + EVT SrcVT; + if (Opcode == ISD::SIGN_EXTEND_INREG) + SrcVT = cast(N.getOperand(1))->getVT(); + else + SrcVT = N.getOperand(0).getValueType(); - // TODO: support SRL and SRA also - if (Shift.getOpcode() != ISD::SHL) + return SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8; + } else if (Opcode == ISD::AND) { + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) return false; + uint64_t AndMask = CSD->getZExtValue(); + return AndMask == 0xff || AndMask == 0xffff || AndMask == 0xffffffff; + } else if (Opcode == ISD::SHL || Opcode == ISD::SRL || Opcode == ISD::SRA) { + // Do we need to consider overflow here? + return isa(N.getOperand(1)); + } - if (!isa(Shift.getOperand(1))) + return false; +} + +// (N - Y) + Z --> (Z - Y) + N +// when N can be extend and Z can't +static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z, + SelectionDAG &DAG) { + auto IsOneUseExtend = [&](SDValue N) { + if (!N.hasOneUse()) return false; - return true; + return canbeExtend(N); }; // DAGCombiner will revert the combination when Z is constant cause // dead loop. So don't enable the combination when Z is constant. // If Z is one use shift C, we also can't do the optimization. // It will falling to self infinite loop. - if (isa(Z) || IsOneUseShiftC(Z)) + if (isa(Z) || IsOneUseExtend(Z)) return SDValue(); if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse()) return SDValue(); SDValue Shift = SUB.getOperand(0); - if (!IsOneUseShiftC(Shift)) + if (!IsOneUseExtend(Shift)) return SDValue(); SDLoc DL(N); Index: llvm/test/CodeGen/AArch64/addsub.ll =================================================================== --- llvm/test/CodeGen/AArch64/addsub.ll +++ llvm/test/CodeGen/AArch64/addsub.ll @@ -694,7 +694,7 @@ ret i32 undef } -; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) +; ((X << C) - Y) + Z --> (Z - Y) + (X << C) define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0: ; CHECK: // %bb.0: @@ -707,13 +707,12 @@ ret i32 %add } -; ((X << C) - Y) + Z --> (Z - Y) + (X << C) +; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_lshr: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, lsr #3 ; CHECK-NEXT: ret %lshr = lshr i32 %x, 3 %sub = sub i32 %lshr, %y @@ -721,13 +720,12 @@ ret i32 %add } -; ((X << C) - Y) + Z --> (Z - Y) + (X << C) +; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_ashr: ; CHECK: // %bb.0: -; CHECK-NEXT: asr w8, w0, #3 -; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, asr #3 ; CHECK-NEXT: ret %ashr = ashr i32 %x, 3 %sub = sub i32 %ashr, %y @@ -735,7 +733,47 @@ ret i32 %add } -; Z + ((X >> C) - Y) --> (Z - Y) + (X >> C) +; ((sext X) - Y) + Z --> (Z - Y) + (sext X) +define i64 @commute_subop0_sext(i32 %x, i64 %y, i64 %z) { +; CHECK-LABEL: commute_subop0_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x2, x1 +; CHECK-NEXT: add x0, x8, w0, sxtw +; CHECK-NEXT: ret + %sext = sext i32 %x to i64 + %sub = sub i64 %sext, %y + %add = add i64 %sub, %z + ret i64 %add +} + +; ((zext X) - Y) + Z --> (Z - Y) + (zext X) +define i32 @commute_subop0_zext(i16 %x, i32 %y, i32 %z) { +; CHECK-LABEL: commute_subop0_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxth +; CHECK-NEXT: ret + %zext = zext i16 %x to i32 + %sub = sub i32 %zext, %y + %add = add i32 %sub, %z + ret i32 %add +} + +; ((X and C) - Y) + Z --> (Z - Y) + (X and C) +define i32 @commute_subop0_and(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: commute_subop0_and: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w2, w1 +; CHECK-NEXT: add w0, w8, w0, uxtb +; CHECK-NEXT: ret + %and = and i32 %x, 255 + %sub = sub i32 %and, %y + %add = add i32 %sub, %z + ret i32 %add +} + + +; Z + ((X << C) - Y) --> (Z - Y) + (X << C) define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_cadd: ; CHECK: // %bb.0: @@ -748,7 +786,7 @@ ret i32 %add } -; Y + ((X >> C) - X) --> (Y - X) + (X >> C) +; Y + ((X << C) - X) --> (Y - X) + (X << C) define i32 @commute_subop0_mul(i32 %x, i32 %y) { ; CHECK-LABEL: commute_subop0_mul: ; CHECK: // %bb.0: @@ -760,7 +798,7 @@ ret i32 %add } -; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) +; negative case for ((X << C) - Y) + Z --> (Z - Y) + (X << C) ; Y can't be constant to avoid dead loop define i32 @commute_subop0_zconst(i32 %x, i32 %y) { ; CHECK-LABEL: commute_subop0_zconst: @@ -775,7 +813,7 @@ ret i32 %add } -; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C) +; negative case for ((X << C) - Y) + Z --> (Z - Y) + (X << C) ; Y can't be shift C also to avoid dead loop define i32 @commute_subop0_zshiftc_oneuse(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: commute_subop0_zshiftc_oneuse: