Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2080,6 +2080,18 @@ return false; } + /// Return true if it may be profitable to fold + /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2), and return false + /// to prevent the folding for definite regression. + /// The target should check the cost of materializing c1, c2 and c1*c2 into + /// registers. If it is not sure about some cases, a default true + /// can be returned to let the DAGCombiner decide. + /// AddNode is (add x, c1), and ConstNode is c2. + virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, + const SDValue &ConstNode) const { + return true; + } + /// Return true if it is more correct/profitable to use strict FP_TO_INT /// conversion operations - canonicalizing the FP source value instead of /// converting all cases and then selecting based on value. Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16854,8 +16854,10 @@ SDValue &ConstNode) { APInt Val; - // If the add only has one use, this would be OK to do. - if (AddNode.getNode()->hasOneUse()) + // If the add only has one use and the target finds no regression, this + // would be OK to do. + if (AddNode.getNode()->hasOneUse() && + TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; // Walk all the users of the constant with which we're multiplying. Index: llvm/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -461,6 +461,15 @@ bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; + /// Return true if it may be profitable to fold + /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2), and return false + /// to prevent the folding for definite regression. + /// The target should check the cost of materializing c1, c2 and c1*c2 into + /// registers. If it is not sure about some cases, a default true + /// can be returned to let the DAGCombiner decide. + bool isMulAddWithConstProfitable(const SDValue &AddNode, + const SDValue &ConstNode) const override; + TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9077,6 +9077,29 @@ return false; } +bool RISCVTargetLowering::isMulAddWithConstProfitable( + const SDValue &AddNode, const SDValue &ConstNode) const { + // Let the DAGCombiner decide for vectors. + EVT VT = AddNode.getValueType(); + if (VT.isVector()) + return true; + + // Let the DAGCombiner decide for larger types. + if (Subtarget.getXLen() < VT.getScalarSizeInBits()) + return true; + + // It is not profitable if c1 is simm12 while c1*c2 is not. + ConstantSDNode *C1Node = cast(AddNode.getOperand(1)); + ConstantSDNode *C2Node = cast(ConstNode); + const APInt &C1 = C1Node->getAPIntValue(); + const APInt &C2 = C2Node->getAPIntValue(); + if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12)) + return false; + + // Default to true and let the DAGCombiner decide. + return true; +} + bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { Index: llvm/test/CodeGen/RISCV/addimm-mulimm.ll =================================================================== --- llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -146,20 +146,16 @@ define i32 @add_mul_combine_reject_a1(i32 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_a1: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1971 ; RV32IMB-NEXT: addi a1, zero, 29 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 14 -; RV32IMB-NEXT: addi a1, a1, -185 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_a1: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: addi a1, zero, 29 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 14 -; RV64IMB-NEXT: addiw a1, a1, -185 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1971 %tmp1 = mul i32 %tmp0, 29 @@ -169,20 +165,16 @@ define signext i32 @add_mul_combine_reject_a2(i32 signext %x) { ; RV32IMB-LABEL: add_mul_combine_reject_a2: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1971 ; RV32IMB-NEXT: addi a1, zero, 29 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 14 -; RV32IMB-NEXT: addi a1, a1, -185 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_a2: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: addi a1, zero, 29 -; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 14 -; RV64IMB-NEXT: addiw a1, a1, -185 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1971 %tmp1 = mul i32 %tmp0, 29 @@ -206,11 +198,9 @@ ; ; RV64IMB-LABEL: add_mul_combine_reject_a3: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1971 ; RV64IMB-NEXT: addi a1, zero, 29 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 14 -; RV64IMB-NEXT: addiw a1, a1, -185 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i64 %x, 1971 %tmp1 = mul i64 %tmp0, 29 @@ -220,20 +210,16 @@ define i32 @add_mul_combine_reject_c1(i32 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_c1: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1000 ; RV32IMB-NEXT: sh3add a1, a0, a0 ; RV32IMB-NEXT: sh3add a0, a1, a0 -; RV32IMB-NEXT: lui a1, 18 -; RV32IMB-NEXT: addi a1, a1, -728 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_c1: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 -; RV64IMB-NEXT: lui a1, 18 -; RV64IMB-NEXT: addiw a1, a1, -728 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1000 %tmp1 = mul i32 %tmp0, 73 @@ -243,20 +229,16 @@ define signext i32 @add_mul_combine_reject_c2(i32 signext %x) { ; RV32IMB-LABEL: add_mul_combine_reject_c2: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1000 ; RV32IMB-NEXT: sh3add a1, a0, a0 ; RV32IMB-NEXT: sh3add a0, a1, a0 -; RV32IMB-NEXT: lui a1, 18 -; RV32IMB-NEXT: addi a1, a1, -728 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_c2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: sh3add a1, a0, a0 -; RV64IMB-NEXT: sh3add a0, a1, a0 -; RV64IMB-NEXT: lui a1, 18 -; RV64IMB-NEXT: addiw a1, a1, -728 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, 1000 +; RV64IMB-NEXT: addi a1, zero, 73 +; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1000 %tmp1 = mul i32 %tmp0, 73 @@ -280,11 +262,9 @@ ; ; RV64IMB-LABEL: add_mul_combine_reject_c3: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh3add a1, a0, a0 ; RV64IMB-NEXT: sh3add a0, a1, a0 -; RV64IMB-NEXT: lui a1, 18 -; RV64IMB-NEXT: addiw a1, a1, -728 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i64 %x, 1000 %tmp1 = mul i64 %tmp0, 73 @@ -294,20 +274,16 @@ define i32 @add_mul_combine_reject_d1(i32 %x) { ; RV32IMB-LABEL: add_mul_combine_reject_d1: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1000 ; RV32IMB-NEXT: sh1add a0, a0, a0 ; RV32IMB-NEXT: slli a0, a0, 6 -; RV32IMB-NEXT: lui a1, 47 -; RV32IMB-NEXT: addi a1, a1, -512 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_d1: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh1add a0, a0, a0 ; RV64IMB-NEXT: slli a0, a0, 6 -; RV64IMB-NEXT: lui a1, 47 -; RV64IMB-NEXT: addiw a1, a1, -512 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1000 %tmp1 = mul i32 %tmp0, 192 @@ -317,20 +293,16 @@ define signext i32 @add_mul_combine_reject_d2(i32 signext %x) { ; RV32IMB-LABEL: add_mul_combine_reject_d2: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 1000 ; RV32IMB-NEXT: sh1add a0, a0, a0 ; RV32IMB-NEXT: slli a0, a0, 6 -; RV32IMB-NEXT: lui a1, 47 -; RV32IMB-NEXT: addi a1, a1, -512 -; RV32IMB-NEXT: add a0, a0, a1 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: add_mul_combine_reject_d2: ; RV64IMB: # %bb.0: -; RV64IMB-NEXT: sh1add a0, a0, a0 -; RV64IMB-NEXT: slli a0, a0, 6 -; RV64IMB-NEXT: lui a1, 47 -; RV64IMB-NEXT: addiw a1, a1, -512 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, 1000 +; RV64IMB-NEXT: addi a1, zero, 192 +; RV64IMB-NEXT: mulw a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i32 %x, 1000 %tmp1 = mul i32 %tmp0, 192 @@ -356,11 +328,9 @@ ; ; RV64IMB-LABEL: add_mul_combine_reject_d3: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 1000 ; RV64IMB-NEXT: sh1add a0, a0, a0 ; RV64IMB-NEXT: slli a0, a0, 6 -; RV64IMB-NEXT: lui a1, 47 -; RV64IMB-NEXT: addiw a1, a1, -512 -; RV64IMB-NEXT: add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = add i64 %x, 1000 %tmp1 = mul i64 %tmp0, 192