diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -123,6 +123,10 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + TargetLoweringOpt &TLO) const override; + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1867,6 +1867,57 @@ return true; } +bool +RISCVTargetLowering::targetShrinkDemandedConstant(SDValue Op, + const APInt &DemandedBits, + const APInt &DemandedElts, + TargetLoweringOpt &TLO) const { + // Delay this optimization as late as possible. + if (!TLO.LegalOps) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + // Only handle AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + const APInt &Mask = C->getAPIntValue(); + + APInt ShrunkMask = Mask & DemandedBits; + + // If the shrunk mask fits in sign extended 12 bits, let the target + // independent code apply it. + if (ShrunkMask.isSignedIntN(12)) + return false; + + // See if we can set the MSBs of the mask to make it sign extended. + APInt NewMask = ShrunkMask; + NewMask.setBitsFrom(11); + + // If we aren't changing the mask, just return true to keep it and prevent + // the caller from optimizing. + if (NewMask == Mask) + return true; + + // Make sure the new mask can be represented by a combination of mask bits + // and non-demanded bits. + if (!NewMask.isSubsetOf(Mask | ~DemandedBits)) + return false; + + // Replace the constant with the new mask. + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); +} + unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { diff --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll --- a/llvm/test/CodeGen/RISCV/frame-info.ll +++ b/llvm/test/CodeGen/RISCV/frame-info.ll @@ -82,10 +82,7 @@ ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: addi a0, a0, 15 -; RV64-NEXT: addi a1, zero, 1 -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: addi a1, a1, -16 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: andi a0, a0, -16 ; RV64-NEXT: sub a0, sp, a0 ; RV64-NEXT: mv sp, a0 ; RV64-NEXT: call callee_with_args@plt @@ -129,10 +126,7 @@ ; RV64-WITHFP-NEXT: slli a0, a0, 32 ; RV64-WITHFP-NEXT: srli a0, a0, 32 ; RV64-WITHFP-NEXT: addi a0, a0, 15 -; RV64-WITHFP-NEXT: addi a1, zero, 1 -; RV64-WITHFP-NEXT: slli a1, a1, 33 -; RV64-WITHFP-NEXT: addi a1, a1, -16 -; RV64-WITHFP-NEXT: and a0, a0, a1 +; RV64-WITHFP-NEXT: andi a0, a0, -16 ; RV64-WITHFP-NEXT: sub a0, sp, a0 ; RV64-WITHFP-NEXT: mv sp, a0 ; RV64-WITHFP-NEXT: call callee_with_args@plt diff --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll @@ -360,10 +360,7 @@ ; RV64I-NEXT: srli a1, a1, 57 ; RV64I-NEXT: andi a1, a1, 63 ; RV64I-NEXT: add a1, a0, a1 -; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: slli a2, a2, 32 -; RV64I-NEXT: addi a2, a2, -64 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: andi a1, a1, -64 ; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; @@ -373,10 +370,7 @@ ; RV64IM-NEXT: srli a1, a1, 57 ; RV64IM-NEXT: andi a1, a1, 63 ; RV64IM-NEXT: add a1, a0, a1 -; RV64IM-NEXT: addi a2, zero, 1 -; RV64IM-NEXT: slli a2, a2, 32 -; RV64IM-NEXT: addi a2, a2, -64 -; RV64IM-NEXT: and a1, a1, a2 +; RV64IM-NEXT: andi a1, a1, -64 ; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = srem i32 %x, 64 diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -677,19 +677,15 @@ ; RV32I-NEXT: lh a1, 4(a1) ; RV32I-NEXT: srli a4, a2, 26 ; RV32I-NEXT: add a4, a2, a4 -; RV32I-NEXT: lui a6, 16 -; RV32I-NEXT: addi a5, a6, -64 -; RV32I-NEXT: and a4, a4, a5 +; RV32I-NEXT: andi a4, a4, -64 ; RV32I-NEXT: sub s2, a2, a4 ; RV32I-NEXT: srli a2, a1, 27 ; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: addi a4, a6, -32 -; RV32I-NEXT: and a2, a2, a4 +; RV32I-NEXT: andi a2, a2, -32 ; RV32I-NEXT: sub s3, a1, a2 ; RV32I-NEXT: srli a1, a3, 29 ; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: addi a2, a6, -8 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: andi a1, a1, -8 ; RV32I-NEXT: sub s1, a3, a1 ; RV32I-NEXT: addi a1, zero, 95 ; RV32I-NEXT: call __modsi3@plt @@ -707,7 +703,7 @@ ; ; RV32IM-LABEL: dont_fold_srem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a6, 8(a1) +; RV32IM-NEXT: lh a2, 8(a1) ; RV32IM-NEXT: lh a3, 4(a1) ; RV32IM-NEXT: lh a4, 12(a1) ; RV32IM-NEXT: lh a1, 0(a1) @@ -715,32 +711,28 @@ ; RV32IM-NEXT: addi a5, a5, 389 ; RV32IM-NEXT: mulh a5, a4, a5 ; RV32IM-NEXT: add a5, a5, a4 -; RV32IM-NEXT: srli a2, a5, 31 +; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 -; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: add a6, a5, a6 ; RV32IM-NEXT: addi a5, zero, 95 -; RV32IM-NEXT: mul a2, a2, a5 -; RV32IM-NEXT: sub a7, a4, a2 -; RV32IM-NEXT: srli a4, a1, 26 -; RV32IM-NEXT: add a4, a1, a4 -; RV32IM-NEXT: lui a5, 16 -; RV32IM-NEXT: addi a2, a5, -64 -; RV32IM-NEXT: and a2, a4, a2 -; RV32IM-NEXT: sub a1, a1, a2 -; RV32IM-NEXT: srli a2, a3, 27 -; RV32IM-NEXT: add a2, a3, a2 -; RV32IM-NEXT: addi a4, a5, -32 -; RV32IM-NEXT: and a2, a2, a4 -; RV32IM-NEXT: sub a2, a3, a2 -; RV32IM-NEXT: srli a3, a6, 29 -; RV32IM-NEXT: add a3, a6, a3 -; RV32IM-NEXT: addi a4, a5, -8 -; RV32IM-NEXT: and a3, a3, a4 -; RV32IM-NEXT: sub a3, a6, a3 -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a2, 2(a0) +; RV32IM-NEXT: mul a5, a6, a5 +; RV32IM-NEXT: sub a4, a4, a5 +; RV32IM-NEXT: srli a5, a1, 26 +; RV32IM-NEXT: add a5, a1, a5 +; RV32IM-NEXT: andi a5, a5, -64 +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: srli a5, a3, 27 +; RV32IM-NEXT: add a5, a3, a5 +; RV32IM-NEXT: andi a5, a5, -32 +; RV32IM-NEXT: sub a3, a3, a5 +; RV32IM-NEXT: srli a5, a2, 29 +; RV32IM-NEXT: add a5, a2, a5 +; RV32IM-NEXT: andi a5, a5, -8 +; RV32IM-NEXT: sub a2, a2, a5 +; RV32IM-NEXT: sh a2, 4(a0) +; RV32IM-NEXT: sh a3, 2(a0) ; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a7, 6(a0) +; RV32IM-NEXT: sh a4, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_power_of_two: @@ -758,19 +750,15 @@ ; RV64I-NEXT: lh a1, 8(a1) ; RV64I-NEXT: srli a4, a2, 58 ; RV64I-NEXT: add a4, a2, a4 -; RV64I-NEXT: lui a6, 16 -; RV64I-NEXT: addiw a5, a6, -64 -; RV64I-NEXT: and a4, a4, a5 +; RV64I-NEXT: andi a4, a4, -64 ; RV64I-NEXT: sub s2, a2, a4 ; RV64I-NEXT: srli a2, a1, 59 ; RV64I-NEXT: add a2, a1, a2 -; RV64I-NEXT: addiw a4, a6, -32 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: andi a2, a2, -32 ; RV64I-NEXT: sub s3, a1, a2 ; RV64I-NEXT: srli a1, a3, 61 ; RV64I-NEXT: add a1, a3, a1 -; RV64I-NEXT: addiw a2, a6, -8 -; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: andi a1, a1, -8 ; RV64I-NEXT: sub s1, a3, a1 ; RV64I-NEXT: addi a1, zero, 95 ; RV64I-NEXT: call __moddi3@plt @@ -807,27 +795,23 @@ ; RV64IM-NEXT: add a2, a5, a2 ; RV64IM-NEXT: addi a5, zero, 95 ; RV64IM-NEXT: mul a2, a2, a5 -; RV64IM-NEXT: sub a7, a1, a2 +; RV64IM-NEXT: sub a1, a1, a2 ; RV64IM-NEXT: srli a2, a4, 58 ; RV64IM-NEXT: add a2, a4, a2 -; RV64IM-NEXT: lui a5, 16 -; RV64IM-NEXT: addiw a1, a5, -64 -; RV64IM-NEXT: and a1, a2, a1 -; RV64IM-NEXT: sub a1, a4, a1 -; RV64IM-NEXT: srli a2, a3, 59 -; RV64IM-NEXT: add a2, a3, a2 -; RV64IM-NEXT: addiw a4, a5, -32 -; RV64IM-NEXT: and a2, a2, a4 -; RV64IM-NEXT: sub a2, a3, a2 -; RV64IM-NEXT: srli a3, a6, 61 -; RV64IM-NEXT: add a3, a6, a3 -; RV64IM-NEXT: addiw a4, a5, -8 -; RV64IM-NEXT: and a3, a3, a4 -; RV64IM-NEXT: sub a3, a6, a3 -; RV64IM-NEXT: sh a3, 4(a0) -; RV64IM-NEXT: sh a2, 2(a0) -; RV64IM-NEXT: sh a1, 0(a0) -; RV64IM-NEXT: sh a7, 6(a0) +; RV64IM-NEXT: andi a2, a2, -64 +; RV64IM-NEXT: sub a2, a4, a2 +; RV64IM-NEXT: srli a4, a3, 59 +; RV64IM-NEXT: add a4, a3, a4 +; RV64IM-NEXT: andi a4, a4, -32 +; RV64IM-NEXT: sub a3, a3, a4 +; RV64IM-NEXT: srli a4, a6, 61 +; RV64IM-NEXT: add a4, a6, a4 +; RV64IM-NEXT: andi a4, a4, -8 +; RV64IM-NEXT: sub a4, a6, a4 +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll --- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll @@ -51,10 +51,7 @@ ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: addi a0, a0, 15 -; RV64I-NEXT: addi a1, zero, 1 -; RV64I-NEXT: slli a1, a1, 33 -; RV64I-NEXT: addi a1, a1, -16 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: mv sp, a0 ; RV64I-NEXT: mv a1, s1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -361,10 +361,7 @@ ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: srli a0, a0, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, a0, 15 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, zero, 1 -; LP64-LP64F-LP64D-FPELIM-NEXT: slli a1, a1, 33 -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, a1, -16 -; LP64-LP64F-LP64D-FPELIM-NEXT: and a0, a0, a1 +; LP64-LP64F-LP64D-FPELIM-NEXT: andi a0, a0, -16 ; LP64-LP64F-LP64D-FPELIM-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: call notdead@plt @@ -396,10 +393,7 @@ ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: srli a0, a0, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, a0, 15 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, zero, 1 -; LP64-LP64F-LP64D-WITHFP-NEXT: slli a1, a1, 33 -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, a1, -16 -; LP64-LP64F-LP64D-WITHFP-NEXT: and a0, a0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: andi a0, a0, -16 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub a0, sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: mv sp, a0 ; LP64-LP64F-LP64D-WITHFP-NEXT: call notdead@plt