Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2486,6 +2486,42 @@ return TLO.CombineTo(Op, NewOp); } + // Match a multiply with a disguised negated-power-of-2 and convert to a + // an equivalent shift-left amount. + // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned { + if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse()) + return 0; + ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1)); + if (MulC && !MulC->isOpaque() && !MulC->isZero()) { + APInt UnmaskedC = MulC->getAPIntValue() | HighMask; + if (UnmaskedC.isNegatedPowerOf2()) + return (-UnmaskedC).logBase2(); + } + return 0; + }; + + auto foldMul = [&](SDValue X, SDValue Y, unsigned ShlAmt) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); + SDValue Sub = TLO.DAG.getNode(ISD::SUB, dl, VT, Y, Shl); + return TLO.CombineTo(Op, Sub); + }; + + if (isOperationLegalOrCustom(ISD::SHL, VT)) { + if (Op.getOpcode() == ISD::ADD) { + // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op0)) + return foldMul(Op0.getOperand(0), Op1, ShAmt); + // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(Op1.getOperand(0), Op0, ShAmt); + // TODO: + // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) + } + } + LLVM_FALLTHROUGH; } default: Index: llvm/test/CodeGen/AArch64/mul_pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_pow2.ll +++ llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -704,8 +704,7 @@ define i32 @muladd_demand(i32 %x, i32 %y) { ; CHECK-LABEL: muladd_demand: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #131008 -; CHECK-NEXT: madd w8, w0, w8, w1 +; CHECK-NEXT: sub w8, w1, w0, lsl #6 ; CHECK-NEXT: and w0, w8, #0x1ffc0 ; CHECK-NEXT: ret ; @@ -724,11 +723,10 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: muladd_demand_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #131008 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s -; CHECK-NEXT: movi v0.4s, #1, msl #16 -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v2.4s, #1, msl #16 +; CHECK-NEXT: shl v0.4s, v0.4s, #6 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret ; ; GISEL-LABEL: muladd_demand_commute: Index: llvm/test/CodeGen/AArch64/srem-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -124,7 +124,7 @@ ; CHECK-NEXT: add w8, w0, w8 ; CHECK-NEXT: csel w8, w8, w0, lt ; CHECK-NEXT: and w8, w8, #0x80000000 -; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, 2147483648 ret i32 %1 Index: llvm/test/CodeGen/AArch64/srem-seteq.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq.ll +++ llvm/test/CodeGen/AArch64/srem-seteq.ll @@ -256,7 +256,7 @@ ; CHECK-NEXT: add w8, w0, w8 ; CHECK-NEXT: csel w8, w8, w0, lt ; CHECK-NEXT: and w8, w8, #0x80000000 -; CHECK-NEXT: cmn w0, w8 +; CHECK-NEXT: cmn w8, w0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %srem = srem i32 %X, 2147483648 Index: llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll @@ -151,45 +151,45 @@ ; MIPSEL-NEXT: lui $1, 12057 ; MIPSEL-NEXT: ori $1, $1, 37186 ; MIPSEL-NEXT: multu $6, $1 -; MIPSEL-NEXT: mflo $2 -; MIPSEL-NEXT: mfhi $3 -; MIPSEL-NEXT: lui $7, 52741 -; MIPSEL-NEXT: ori $7, $7, 40665 -; MIPSEL-NEXT: multu $6, $7 -; MIPSEL-NEXT: mflo $8 +; MIPSEL-NEXT: mflo $1 +; MIPSEL-NEXT: mfhi $2 +; MIPSEL-NEXT: lui $3, 52741 +; MIPSEL-NEXT: ori $3, $3, 40665 +; MIPSEL-NEXT: multu $6, $3 +; MIPSEL-NEXT: mflo $7 +; MIPSEL-NEXT: mfhi $8 +; MIPSEL-NEXT: multu $5, $3 ; MIPSEL-NEXT: mfhi $9 -; MIPSEL-NEXT: multu $5, $7 -; MIPSEL-NEXT: mfhi $10 -; MIPSEL-NEXT: mflo $11 -; MIPSEL-NEXT: addu $9, $11, $9 -; MIPSEL-NEXT: addu $12, $2, $9 -; MIPSEL-NEXT: sltu $9, $9, $11 -; MIPSEL-NEXT: sll $11, $12, 31 -; MIPSEL-NEXT: sltu $2, $12, $2 -; MIPSEL-NEXT: srl $13, $8, 1 -; MIPSEL-NEXT: sll $8, $8, 1 -; MIPSEL-NEXT: addu $2, $3, $2 -; MIPSEL-NEXT: or $3, $13, $11 -; MIPSEL-NEXT: srl $11, $12, 1 -; MIPSEL-NEXT: addu $9, $10, $9 -; MIPSEL-NEXT: mul $4, $4, $7 -; MIPSEL-NEXT: mul $1, $5, $1 -; MIPSEL-NEXT: sll $5, $6, 1 +; MIPSEL-NEXT: mflo $10 +; MIPSEL-NEXT: addu $8, $10, $8 +; MIPSEL-NEXT: addu $11, $1, $8 +; MIPSEL-NEXT: sltu $8, $8, $10 +; MIPSEL-NEXT: sll $10, $11, 31 +; MIPSEL-NEXT: sltu $1, $11, $1 +; MIPSEL-NEXT: srl $12, $7, 1 +; MIPSEL-NEXT: sll $7, $7, 1 +; MIPSEL-NEXT: addu $1, $2, $1 +; MIPSEL-NEXT: or $10, $12, $10 +; MIPSEL-NEXT: srl $2, $11, 1 +; MIPSEL-NEXT: addu $8, $9, $8 +; MIPSEL-NEXT: mul $3, $4, $3 +; MIPSEL-NEXT: sll $4, $6, 1 +; MIPSEL-NEXT: sll $5, $5, 1 ; MIPSEL-NEXT: lui $6, 60010 ; MIPSEL-NEXT: ori $6, $6, 61135 -; MIPSEL-NEXT: addu $2, $9, $2 -; MIPSEL-NEXT: addu $1, $1, $2 -; MIPSEL-NEXT: addu $2, $5, $4 -; MIPSEL-NEXT: addu $1, $1, $2 +; MIPSEL-NEXT: addu $1, $8, $1 +; MIPSEL-NEXT: subu $1, $1, $5 +; MIPSEL-NEXT: addu $3, $4, $3 +; MIPSEL-NEXT: addu $1, $1, $3 ; MIPSEL-NEXT: andi $1, $1, 3 -; MIPSEL-NEXT: sll $2, $1, 31 -; MIPSEL-NEXT: or $4, $11, $2 -; MIPSEL-NEXT: sltiu $2, $4, 13 -; MIPSEL-NEXT: xori $4, $4, 13 -; MIPSEL-NEXT: sltu $3, $3, $6 -; MIPSEL-NEXT: movz $2, $3, $4 +; MIPSEL-NEXT: sll $3, $1, 31 +; MIPSEL-NEXT: or $3, $2, $3 +; MIPSEL-NEXT: sltiu $2, $3, 13 +; MIPSEL-NEXT: xori $3, $3, 13 +; MIPSEL-NEXT: sltu $4, $10, $6 +; MIPSEL-NEXT: movz $2, $4, $3 ; MIPSEL-NEXT: srl $1, $1, 1 -; MIPSEL-NEXT: or $1, $1, $8 +; MIPSEL-NEXT: or $1, $1, $7 ; MIPSEL-NEXT: andi $1, $1, 3 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: movn $2, $zero, $1 @@ -212,7 +212,7 @@ ; MIPS64EL-NEXT: dsll $5, $5, 16 ; MIPS64EL-NEXT: daddiu $5, $5, -4401 ; MIPS64EL-NEXT: dsll $4, $4, 1 -; MIPS64EL-NEXT: daddu $3, $3, $4 +; MIPS64EL-NEXT: dsubu $3, $3, $4 ; MIPS64EL-NEXT: daddu $2, $3, $2 ; MIPS64EL-NEXT: andi $3, $2, 3 ; MIPS64EL-NEXT: dsll $2, $3, 63 Index: llvm/test/CodeGen/PowerPC/srem-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/srem-lkk.ll +++ llvm/test/CodeGen/PowerPC/srem-lkk.ll @@ -122,7 +122,7 @@ ; CHECK-NEXT: srawi 4, 3, 31 ; CHECK-NEXT: addze 4, 4 ; CHECK-NEXT: slwi 4, 4, 31 -; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: add 3, 4, 3 ; CHECK-NEXT: blr %1 = srem i32 %x, 2147483648 ret i32 %1 Index: llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll @@ -258,7 +258,7 @@ ; PPC64LE-NEXT: mulld 4, 4, 5 ; PPC64LE-NEXT: mulld 5, 3, 5 ; PPC64LE-NEXT: sldi 3, 3, 1 -; PPC64LE-NEXT: add 3, 6, 3 +; PPC64LE-NEXT: sub 3, 6, 3 ; PPC64LE-NEXT: add 3, 3, 4 ; PPC64LE-NEXT: lis 4, -8538 ; PPC64LE-NEXT: rotldi 6, 5, 63 Index: llvm/test/CodeGen/RISCV/mul.ll =================================================================== --- llvm/test/CodeGen/RISCV/mul.ll +++ llvm/test/CodeGen/RISCV/mul.ll @@ -1550,47 +1550,29 @@ define i8 @muladd_demand(i8 %x, i8 %y) nounwind { ; RV32I-LABEL: muladd_demand: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: li a1, 14 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: add a0, s0, a0 +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: andi a0, a0, 15 -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muladd_demand: ; RV32IM: # %bb.0: -; RV32IM-NEXT: li a2, 14 -; RV32IM-NEXT: mul a0, a0, a2 -; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: slli a0, a0, 1 +; RV32IM-NEXT: sub a0, a1, a0 ; RV32IM-NEXT: andi a0, a0, 15 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muladd_demand: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: li a1, 14 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: addw a0, s0, a0 +; RV64I-NEXT: slliw a0, a0, 1 +; RV64I-NEXT: subw a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 15 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muladd_demand: ; RV64IM: # %bb.0: -; RV64IM-NEXT: li a2, 14 -; RV64IM-NEXT: mulw a0, a0, a2 -; RV64IM-NEXT: addw a0, a1, a0 +; RV64IM-NEXT: slliw a0, a0, 1 +; RV64IM-NEXT: subw a0, a1, a0 ; RV64IM-NEXT: andi a0, a0, 15 ; RV64IM-NEXT: ret %m = mul i8 %x, 14 Index: llvm/test/CodeGen/RISCV/srem-lkk.ll =================================================================== --- llvm/test/CodeGen/RISCV/srem-lkk.ll +++ llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -367,7 +367,7 @@ ; RV32I-NEXT: add a1, a0, a1 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: dont_fold_srem_i32_smax: @@ -377,7 +377,7 @@ ; RV32IM-NEXT: add a1, a0, a1 ; RV32IM-NEXT: lui a2, 524288 ; RV32IM-NEXT: and a1, a1, a2 -; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_i32_smax: Index: llvm/test/CodeGen/X86/mul-demand.ll =================================================================== --- llvm/test/CodeGen/X86/mul-demand.ll +++ llvm/test/CodeGen/X86/mul-demand.ll @@ -4,8 +4,9 @@ define i64 @muladd_demand(i64 %x, i64 %y) { ; CHECK-LABEL: muladd_demand: ; CHECK: # %bb.0: -; CHECK-NEXT: imull $131008, %edi, %eax # imm = 0x1FFC0 -; CHECK-NEXT: addl %esi, %eax +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shll $6, %edi +; CHECK-NEXT: subl %edi, %eax ; CHECK-NEXT: shlq $47, %rax ; CHECK-NEXT: retq %m = mul i64 %x, 131008 ; 0x0001ffc0 @@ -17,9 +18,10 @@ define <2 x i64> @muladd_demand_commute(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: muladd_demand_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-NEXT: paddq %xmm1, %xmm0 -; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: psllq $6, %xmm0 +; CHECK-NEXT: psubq %xmm0, %xmm1 +; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %m = mul <2 x i64> %x, %a = add <2 x i64> %y, %m