diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2533,24 +2533,27 @@ return 0; }; - auto foldMul = [&](SDValue X, SDValue Y, unsigned ShlAmt) { + auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) { EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); - SDValue Sub = TLO.DAG.getNode(ISD::SUB, dl, VT, Y, Shl); - return TLO.CombineTo(Op, Sub); + SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl); + return TLO.CombineTo(Op, Res); }; if (isOperationLegalOrCustom(ISD::SHL, VT)) { if (Op.getOpcode() == ISD::ADD) { // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) if (unsigned ShAmt = getShiftLeftAmt(Op0)) - return foldMul(Op0.getOperand(0), Op1, ShAmt); + return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt); // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) if (unsigned ShAmt = getShiftLeftAmt(Op1)) - return foldMul(Op1.getOperand(0), Op0, ShAmt); - // TODO: + return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt); + } + if (Op.getOpcode() == ISD::SUB) { // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt); } } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1584,47 +1584,29 @@ define i8 @mulsub_demand(i8 %x, i8 %y) nounwind { ; RV32I-LABEL: mulsub_demand: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: li a1, 14 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: sub a0, s0, a0 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: andi a0, a0, 15 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulsub_demand: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 14 -; RV32IM-NEXT: mul a0, a0, a2 -; RV32IM-NEXT: sub a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: andi a0, a0, 15 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mulsub_demand: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: li a1, 14 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: subw a0, s0, a0 +; RV64I-NEXT: slliw a0, a0, 1 +; RV64I-NEXT: addw a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 15 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: mulsub_demand: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a2, 14 -; RV64IM-NEXT: mulw a0, a0, a2 -; RV64IM-NEXT: subw a0, a1, a0 +; RV64IM-NEXT: slliw a0, a0, 1 +; RV64IM-NEXT: addw a0, a1, a0 ; RV64IM-NEXT: andi a0, a0, 15 ; RV64IM-NEXT: ret %m = mul i8 %x, 14