Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12871,11 +12871,55 @@ if (DCI.isBeforeLegalizeOps()) return SDValue(); + // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y, + // and in MachineCombiner pass, add+mul will be combined into madd. + // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X. + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Op0; + SDValue Op1; + + if (!isa(N0) && !isa(N1)) { + if (N0->getOpcode() == ISD::ADD || N0->getOpcode() == ISD::SUB) { + Op0 = N0; + Op1 = N1; + } else if (N1->getOpcode() == ISD::ADD || N1->getOpcode() == ISD::SUB) { + Op0 = N1; + Op1 = N0; + } + + if (!Op0 || !Op0->hasOneUse()) + return SDValue(); + + if (Op0->getOpcode() == ISD::ADD && + isa(Op0->getOperand(1))) { + ConstantSDNode *C = cast(Op0->getOperand(1)); + if (C && C->isOne()) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, Op1, Op0->getOperand(0)); + SDValue Res = DAG.getNode(ISD::ADD, DL, VT, MulVal, Op1); + return Res; + } + } + + if (Op0->getOpcode() == ISD::SUB && + isa(Op0->getOperand(0))) { + ConstantSDNode *C = cast(Op0->getOperand(0)); + if (C && C->isOne()) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, Op1, Op0->getOperand(1)); + SDValue Res = DAG.getNode(ISD::SUB, DL, VT, Op1, MulVal); + return Res; + } + } + + return SDValue(); + } + // The below optimizations require a constant RHS. if (!isa(N->getOperand(1))) return SDValue(); - SDValue N0 = N->getOperand(0); ConstantSDNode *C = cast(N->getOperand(1)); const APInt &ConstValue = C->getAPIntValue(); @@ -12954,8 +12998,6 @@ return SDValue(); } - SDLoc DL(N); - EVT VT = N->getValueType(0); SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShiftAmt, DL, MVT::i64)); Index: llvm/test/CodeGen/AArch64/madd-combiner.ll =================================================================== --- llvm/test/CodeGen/AArch64/madd-combiner.ll +++ llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MADD-MSUB ; Test that we use the correct register class. define i32 @mul_add_imm(i32 %a, i32 %b) { @@ -35,3 +36,50 @@ ret void } +define i32 @add1_mul_val1(i32 %a, i32 %b) { +; CHECK-LABEL: add1_mul_val1 +; CHECK-MADD-MSUB: madd w0, w1, w0, w1 + %1 = add i32 %a, 1 + %2 = mul i32 %1, %b + ret i32 %2 +} + +define i32 @add1_mul_val2(i32 %a, i32 %b) { +; CHECK-LABEL: add1_mul_val2 +; CHECK-MADD-MSUB: madd w0, w0, w1, w0 + %1 = add i32 %b, 1 + %2 = mul i32 %a, %1 + ret i32 %2 +} + +define i64 @add1_mul_val3(i64 %a, i64 %b) { +; CHECK-LABEL: add1_mul_val3 +; CHECK-MADD-MSUB: madd x0, x0, x1, x0 + %1 = add i64 %b, 1 + %2 = mul i64 %a, %1 + ret i64 %2 +} + +define i32 @sub1_mul_val1(i32 %a, i32 %b) { +; CHECK-LABEL: sub1_mul_val1 +; CHECK-MADD-MSUB: msub w0, w1, w0, w1 + %1 = sub i32 1, %a + %2 = mul i32 %1, %b + ret i32 %2 +} + +define i32 @sub1_mul_val2(i32 %a, i32 %b) { +; CHECK-LABEL: sub1_mul_val2 +; CHECK-MADD-MSUB: msub w0, w0, w1, w0 + %1 = sub i32 1, %b + %2 = mul i32 %a, %1 + ret i32 %2 +} + +define i64 @sub1_mul_val3(i64 %a, i64 %b) { +; CHECK-LABEL: sub1_mul_val3 +; CHECK-MADD-MSUB: msub x0, x0, x1, x0 + %1 = sub i64 1, %b + %2 = mul i64 %a, %1 + ret i64 %2 +}