diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -119,6 +119,8 @@ return false; return true; } + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -841,6 +841,22 @@ return SDValue(); } +bool RISCVTargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + // The following folds are only desirable if constant `c1` cannot fit into an + // immediate: + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) { + SDValue C1 = N0->getOperand(1); + if (auto *Const = dyn_cast(C1)) { + return !isLegalAddImmediate(Const->getSExtValue()); + } + } + return true; +} + unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; These test that constant adds are not moved after shifts by DAGCombine, +; if the constant can fit into an immediate. +; +; Materialising the large (shifted) constant produced for the new add +; uses an extra register, and takes several instructions. It is more +; efficient to perform the add before the shift if the constant to be +; added fits into an immediate. + +define signext i32 @add_small_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_small_const: +; RV32I: # %bb.0: +; RV32I-NEXT: addi a0, a0, 1 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_small_const: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, 1 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: ret + %1 = add i32 %a, 1 + %2 = shl i32 %1, 24 + %3 = ashr i32 %2, 24 + ret i32 %3 +} + +; NOTE: This add constant does not fit into an add immediate, so we allow the +; the transformation to fire. However, this introduces a second left shift, +; which we wouldn't need if we did the add before the shl. +define signext i32 @add_large_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_large_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 65520 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_large_const: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 4095 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +} + +; NOTE: This add constant does not fit into an add immediate, so we allow the +; the transformation to fire. However, this introduces a second left shift, +; which we wouldn't need if we did the add before the shl. +define signext i32 @add_huge_const(i32 signext %a) nounwind { +; RV32I-LABEL: add_huge_const: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: lui a1, 524272 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: add_huge_const: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret + %1 = add i32 %a, 32767 + %2 = shl i32 %1, 16 + %3 = ashr i32 %2, 16 + ret i32 %3 +}